From 3e40f0b9c3f6c9ebde0a89e35cddaf3405292458 Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 7 Dec 2018 15:18:26 +0000 Subject: [PATCH 01/59] placer1: New cost calculation infrastructure Signed-off-by: David Shah --- common/placer1.cc | 104 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 1 deletion(-) diff --git a/common/placer1.cc b/common/placer1.cc index 5b72602f..32986e37 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -47,6 +47,14 @@ NEXTPNR_NAMESPACE_BEGIN class SAPlacer { + private: + struct BoundingBox + { + int x0 = 0, x1 = 0, y0 = 0, y1 = 0; + bool includes(int x, int y) const { return x >= x0 && x <= x1 && y >= y0 && y <= y1; } + wirelen_t hpwl() const { return wirelen_t((x1 - x0) + (y1 - y0)); } + }; + public: SAPlacer(Context *ctx, Placer1Cfg cfg) : ctx(ctx), cfg(cfg) { @@ -494,10 +502,104 @@ class SAPlacer } } + // Return true if a net is to be entirely ignored + inline bool ignore_net(NetInfo *net) + { + return net->driver.cell == nullptr || net->driver.cell->bel == BelId() || + ctx->getBelGlobalBuf(net->driver.cell->bel); + } + + // Get the bounding box for a net + inline BoundingBox get_net_bounds(NetInfo *net) + { + BoundingBox bb; + NPNR_ASSERT(net->driver.cell != nullptr); + Loc dloc = ctx->getBelLocation(net->driver.cell->bel); + bb.x0 = dloc.x; + bb.x1 = dloc.x; + bb.y0 = dloc.y; + bb.y1 = dloc.y; + + for (auto user : net->users) { + if (user.cell->bel == BelId()) + continue; + Loc uloc = ctx->getBelLocation(user.cell->bel); + bb.x0 = std::min(bb.x0, uloc.x); + bb.x1 = std::max(bb.x1, uloc.x); + bb.y0 = std::min(bb.y0, uloc.y); + bb.y1 = std::max(bb.y1, uloc.y); + } + + return bb; + } + + // Get the timing cost for an arc of a net + inline double get_timing_cost(NetInfo *net, size_t user) + { + int cc; + if (net->driver.cell == nullptr) + return 0; + if (ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc) == TMG_IGNORE) + return 0; + auto crit = net_crit.find(net->name); + if (crit == net_crit.end() || crit->second.criticality.empty()) + return 0; + double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user))); + return delay * std::pow(crit->second.criticality.at(user), crit_exp); + } + + // Set up the cost maps + void setup_costs() + { + for (auto net : sorted(ctx->nets)) { + NetInfo *ni = net.second; + if (ignore_net(ni)) + continue; + net_bounds[ni->name] = get_net_bounds(ni); + net_arc_tcost[ni->name].resize(ni->users.size()); + for (size_t i = 0; i < ni->users.size(); i++) + net_arc_tcost[ni->name][i] = get_timing_cost(ni, i); + } + } + + // Get the total wiring cost for the design + wirelen_t total_wirelen_cost() + { + wirelen_t cost = 0; + for (const auto &net : net_bounds) + cost += net.second.hpwl(); + return cost; + } + + // Get the total timing cost for the design + double total_delay_cost() + { + double cost = 0; + for (const auto &net : net_arc_tcost) { + for (auto arc_cost : net.second) { + cost += arc_cost; + } + } + return cost; + } + + // Map nets to their bounding box (so we can skip recompute for moves that do not exceed the bounds + std::unordered_map net_bounds; + // Map net arcs to their timing cost (criticality * delay ns) + std::unordered_map> net_arc_tcost; + + // Wirelength and timing cost at last and current iteration + wirelen_t last_wirelen_cost, curr_wirelen_cost; + double last_timing_cost, curr_timing_cost; + + // Criticality data from timing analysis + NetCriticalityMap net_crit; + Context *ctx; - wirelen_t curr_metric = std::numeric_limits::max(); float curr_tns = 0; float temp = 1000; + float crit_exp = 8; + float lambda = 0.5; bool improved = false; int n_move, n_accept; int diameter = 35, max_x = 1, max_y = 1; From f8f89cea71b4ea413d1613c2ce86c10dcfbbbd7c Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 7 Dec 2018 16:45:14 +0000 Subject: [PATCH 02/59] placer1: Rework to use new criticality-based weighted cost function Signed-off-by: David Shah --- common/placer1.cc | 236 +++++++++++++++++++++++++++++----------------- 1 file changed, 148 insertions(+), 88 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index 32986e37..9b5352e0 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -42,6 +42,19 @@ #include "place_common.h" #include "timing.h" #include "util.h" +namespace std { + template <> struct hash> + { + std::size_t + operator()(const std::pair &idp) const noexcept + { + std::size_t seed = 0; + boost::hash_combine(seed, hash()(idp.first)); + boost::hash_combine(seed, hash()(idp.second)); + return seed; + } + }; +} NEXTPNR_NAMESPACE_BEGIN @@ -51,7 +64,8 @@ class SAPlacer struct BoundingBox { int x0 = 0, x1 = 0, y0 = 0, y1 = 0; - bool includes(int x, int y) const { return x >= x0 && x <= x1 && y >= y0 && y <= y1; } + bool is_inside_inc(int x, int y) const { return x >= x0 && x <= x1 && y >= y0 && y <= y1; } + bool touches_bounds(int x, int y) const { return x == x0 || x == x1 || y == y0 || y == y1; } wirelen_t hpwl() const { return wirelen_t((x1 - x0) + (y1 - y0)); } }; @@ -86,20 +100,10 @@ class SAPlacer } diameter = std::max(max_x, max_y) + 1; - costs.resize(ctx->nets.size()); - old_udata.reserve(ctx->nets.size()); - decltype(NetInfo::udata) n = 0; - for (auto &net : ctx->nets) { - old_udata.emplace_back(net.second->udata); - net.second->udata = n++; - } + build_port_index(); } - ~SAPlacer() - { - for (auto &net : ctx->nets) - net.second->udata = old_udata[net.second->udata]; - } + ~SAPlacer() {} bool place() { @@ -179,18 +183,19 @@ class SAPlacer auto saplace_start = std::chrono::high_resolution_clock::now(); log_info("Running simulated annealing placer.\n"); - // Calculate metric after initial placement - curr_metric = 0; - curr_tns = 0; - for (auto &net : ctx->nets) { - wirelen_t wl = get_net_metric(ctx, net.second.get(), MetricType::COST, curr_tns); - costs[net.second->udata] = CostChange{wl, -1}; - curr_metric += wl; - } + // Invoke timing analysis to obtain criticalities + get_criticalities(ctx, &net_crit); + + // Calculate costs after initial placement + setup_costs(); + curr_wirelen_cost = total_wirelen_cost(); + curr_timing_cost = total_timing_cost(); + last_wirelen_cost = curr_wirelen_cost; + last_timing_cost = curr_timing_cost; + + double avg_metric = curr_metric(), min_metric = curr_metric(); int n_no_progress = 0; - wirelen_t min_metric = curr_metric; - double avg_metric = curr_metric; temp = 10000; // Main simulated annealing loop @@ -199,9 +204,9 @@ class SAPlacer improved = false; if (iter % 5 == 0 || iter == 1) - log_info(" at iteration #%d: temp = %f, cost = " - "%.0f, est tns = %.02fns\n", - iter, temp, double(curr_metric), curr_tns); + log_info(" at iteration #%d: temp = %f, timing cost = " + "%.0f, wirelen = %.0f est tns = %.02fns\n", + iter, temp, double(curr_timing_cost), double(curr_wirelen_cost), curr_tns); for (int m = 0; m < 15; ++m) { // Loop through all automatically placed cells @@ -215,8 +220,8 @@ class SAPlacer } } - if (curr_metric < min_metric) { - min_metric = curr_metric; + if (curr_metric() < min_metric) { + min_metric = curr_metric(); improved = true; } @@ -227,8 +232,9 @@ class SAPlacer n_no_progress++; if (temp <= 1e-3 && n_no_progress >= 5) { - if (iter % 5 != 0) - log_info(" at iteration #%d: temp = %f, cost = %f\n", iter, temp, double(curr_metric)); + log_info(" at iteration #%d: temp = %f, timing cost = " + "%.0f, wirelen = %.0f est tns = %.02fns\n", + iter, temp, double(curr_timing_cost), double(curr_wirelen_cost), curr_tns); break; } @@ -238,8 +244,8 @@ class SAPlacer double upper = 0.6, lower = 0.4; - if (curr_metric < 0.95 * avg_metric && curr_metric > 0) { - avg_metric = 0.8 * avg_metric + 0.2 * curr_metric; + if (curr_metric() < 0.95 * avg_metric && curr_metric > 0) { + avg_metric = 0.8 * avg_metric + 0.2 * curr_metric(); } else { if (Raccept >= 0.8) { temp *= 0.7; @@ -281,16 +287,14 @@ class SAPlacer assign_budget(ctx, true /* quiet */); } + // Invoke timing analysis to obtain criticalities + get_criticalities(ctx, &net_crit); + // Need to rebuild costs after criticalities change + setup_costs(); // Recalculate total metric entirely to avoid rounding errors // accumulating over time - curr_metric = 0; - curr_tns = 0; - for (auto &net : ctx->nets) { - wirelen_t wl = get_net_metric(ctx, net.second.get(), MetricType::COST, curr_tns); - costs[net.second->udata] = CostChange{wl, -1}; - curr_metric += wl; - } - + curr_wirelen_cost = total_wirelen_cost(); + curr_timing_cost = total_timing_cost(); // Let the UI show visualization updates. ctx->yield(); } @@ -381,8 +385,7 @@ class SAPlacer // Attempt a SA position swap, return true on success or false on failure bool try_swap_position(CellInfo *cell, BelId newBel) { - static std::vector updates; - updates.clear(); + moveChange.reset(); BelId oldBel = cell->bel; CellInfo *other_cell = ctx->getBoundBelCell(newBel); if (other_cell != nullptr && other_cell->belStrength > STRENGTH_WEAK) { @@ -392,31 +395,16 @@ class SAPlacer int new_dist; if (other_cell != nullptr) old_dist += get_constraints_distance(ctx, other_cell); - wirelen_t new_metric = 0, delta; + double delta = 0; ctx->unbindBel(oldBel); if (other_cell != nullptr) { ctx->unbindBel(newBel); } - for (const auto &port : cell->ports) { - if (port.second.net != nullptr) { - auto &cost = costs[port.second.net->udata]; - if (cost.new_cost == 0) - continue; - cost.new_cost = 0; - updates.emplace_back(port.second.net); - } - } + add_move_cell(moveChange, cell, oldBel); if (other_cell != nullptr) { - for (const auto &port : other_cell->ports) - if (port.second.net != nullptr) { - auto &cost = costs[port.second.net->udata]; - if (cost.new_cost == 0) - continue; - cost.new_cost = 0; - updates.emplace_back(port.second.net); - } + add_move_cell(moveChange, other_cell, newBel); } ctx->bindBel(newBel, cell, STRENGTH_WEAK); @@ -431,22 +419,14 @@ class SAPlacer goto swap_fail; } - new_metric = curr_metric; - // Recalculate metrics for all nets touched by the peturbation - for (const auto &net : updates) { - auto &c = costs[net->udata]; - new_metric -= c.curr_cost; - float temp_tns = 0; - wirelen_t net_new_wl = get_net_metric(ctx, net, MetricType::COST, temp_tns); - new_metric += net_new_wl; - c.new_cost = net_new_wl; - } + compute_cost_changes(moveChange); new_dist = get_constraints_distance(ctx, cell); if (other_cell != nullptr) new_dist += get_constraints_distance(ctx, other_cell); - delta = new_metric - curr_metric; + delta = lambda * (moveChange.timing_delta / last_timing_cost) + + (1 - lambda) * (double(moveChange.wirelen_delta) / last_wirelen_cost); delta += (cfg.constraintWeight / temp) * (new_dist - old_dist); n_move++; // SA acceptance criterea @@ -458,20 +438,13 @@ class SAPlacer ctx->unbindBel(newBel); goto swap_fail; } - curr_metric = new_metric; - for (const auto &net : updates) { - auto &c = costs[net->udata]; - c = CostChange{c.new_cost, -1}; - } - + commit_cost_changes(moveChange); return true; swap_fail: ctx->bindBel(oldBel, cell, STRENGTH_WEAK); if (other_cell != nullptr) { ctx->bindBel(newBel, other_cell, STRENGTH_WEAK); } - for (const auto &net : updates) - costs[net->udata].new_cost = -1; return false; } @@ -572,7 +545,7 @@ class SAPlacer } // Get the total timing cost for the design - double total_delay_cost() + double total_timing_cost() { double cost = 0; for (const auto &net : net_arc_tcost) { @@ -583,11 +556,106 @@ class SAPlacer return cost; } + // Cost-change-related data for a move + struct MoveChangeData + { + std::unordered_set bounds_changed_nets; + std::unordered_set> changed_arcs; + + std::unordered_map new_net_bounds; + std::unordered_map, double> new_arc_costs; + + wirelen_t wirelen_delta = 0; + double timing_delta = 0; + + void reset() + { + bounds_changed_nets.clear(); + changed_arcs.clear(); + new_net_bounds.clear(); + new_arc_costs.clear(); + } + + } moveChange; + + void add_move_cell(MoveChangeData &mc, CellInfo *cell, BelId old_bel) + { + Loc curr_loc = ctx->getBelLocation(cell->bel); + Loc old_loc = ctx->getBelLocation(old_bel); + // Check net bounds + for (const auto &port : cell->ports) { + NetInfo *pn = port.second.net; + if (pn == nullptr) + continue; + if (ignore_net(pn)) + continue; + const BoundingBox &curr_bounds = net_bounds[pn->name]; + // If the old location was at the edge of the bounds, or the new location exceeds the bounds, + // an update is needed + if (curr_bounds.touches_bounds(old_loc.x, old_loc.y) || !curr_bounds.is_inside_inc(curr_loc.x, curr_loc.y)) + mc.bounds_changed_nets.insert(pn->name); + // Output ports - all arcs change timing + if (port.second.type == PORT_OUT) { + int cc; + TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc); + if (cls != TMG_IGNORE) + for (size_t i = 0; i < pn->users.size(); i++) + mc.changed_arcs.insert(std::make_pair(pn->name, i)); + } else if (port.second.type == PORT_IN) { + mc.changed_arcs.insert(std::make_pair(pn->name, fast_port_to_user.at(&port.second))); + } + } + } + + void compute_cost_changes(MoveChangeData &md) + { + for (const auto &bc : md.bounds_changed_nets) { + wirelen_t old_hpwl = net_bounds.at(bc).hpwl(); + auto bounds = get_net_bounds(ctx->nets.at(bc).get()); + md.new_net_bounds[bc] = bounds; + md.wirelen_delta += (bounds.hpwl() - old_hpwl); + } + + for (const auto &tc : md.changed_arcs) { + double old_cost = net_arc_tcost.at(tc.first).at(tc.second); + double new_cost = get_timing_cost(ctx->nets.at(tc.first).get(), tc.second); + md.new_arc_costs[tc] = new_cost; + md.timing_delta += (new_cost - old_cost); + } + } + + void commit_cost_changes(MoveChangeData &md) + { + for (const auto &bc : md.new_net_bounds) + net_bounds[bc.first] = bc.second; + for (const auto &tc : md.new_arc_costs) + net_arc_tcost[tc.first.first].at(tc.first.second) = tc.second; + curr_wirelen_cost += md.wirelen_delta; + curr_timing_cost += md.timing_delta; + } + // Build the cell port -> user index + void build_port_index() + { + for (auto net : sorted(ctx->nets)) { + NetInfo *ni = net.second; + for (size_t i = 0; i < ni->users.size(); i++) { + auto &usr = ni->users.at(i); + fast_port_to_user[&(usr.cell->ports.at(usr.port))] = i; + } + } + } + + // Get the combined wirelen/timing metric + inline double curr_metric() { return lambda * curr_timing_cost + (1 - lambda) * curr_wirelen_cost; } + // Map nets to their bounding box (so we can skip recompute for moves that do not exceed the bounds std::unordered_map net_bounds; // Map net arcs to their timing cost (criticality * delay ns) std::unordered_map> net_arc_tcost; + // Fast lookup for cell port to net user index + std::unordered_map fast_port_to_user; + // Wirelength and timing cost at last and current iteration wirelen_t last_wirelen_cost, curr_wirelen_cost; double last_timing_cost, curr_timing_cost; @@ -611,14 +679,6 @@ class SAPlacer const float post_legalise_temp = 10; const float post_legalise_dia_scale = 1.5; Placer1Cfg cfg; - - struct CostChange - { - wirelen_t curr_cost; - wirelen_t new_cost; - }; - std::vector costs; - std::vector old_udata; }; Placer1Cfg::Placer1Cfg(Context *ctx) : Settings(ctx) From 2ef2d2b2cb7cb2e54f8a0d8d5476e361ec5e68ea Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 7 Dec 2018 18:18:24 +0000 Subject: [PATCH 03/59] placer1: Bugfixes Signed-off-by: David Shah --- common/placer1.cc | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index 9b5352e0..baddf4eb 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -193,7 +193,8 @@ class SAPlacer last_wirelen_cost = curr_wirelen_cost; last_timing_cost = curr_timing_cost; - double avg_metric = curr_metric(), min_metric = curr_metric(); + wirelen_t avg_wirelen = curr_wirelen_cost; + wirelen_t min_wirelen = curr_wirelen_cost; int n_no_progress = 0; temp = 10000; @@ -220,8 +221,8 @@ class SAPlacer } } - if (curr_metric() < min_metric) { - min_metric = curr_metric(); + if (curr_wirelen_cost < min_wirelen) { + min_wirelen = curr_wirelen_cost; improved = true; } @@ -243,9 +244,9 @@ class SAPlacer int M = std::max(max_x, max_y) + 1; double upper = 0.6, lower = 0.4; - - if (curr_metric() < 0.95 * avg_metric && curr_metric > 0) { - avg_metric = 0.8 * avg_metric + 0.2 * curr_metric(); + + if (curr_wirelen_cost < 0.95 * avg_wirelen && curr_wirelen_cost > 0) { + avg_wirelen = 0.8 * avg_wirelen + 0.2 * curr_wirelen_cost; } else { if (Raccept >= 0.8) { temp *= 0.7; @@ -295,6 +296,8 @@ class SAPlacer // accumulating over time curr_wirelen_cost = total_wirelen_cost(); curr_timing_cost = total_timing_cost(); + last_wirelen_cost = curr_wirelen_cost; + last_timing_cost = curr_timing_cost; // Let the UI show visualization updates. ctx->yield(); } @@ -401,17 +404,18 @@ class SAPlacer ctx->unbindBel(newBel); } + ctx->bindBel(newBel, cell, STRENGTH_WEAK); + + if (other_cell != nullptr) { + ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK); + } + add_move_cell(moveChange, cell, oldBel); if (other_cell != nullptr) { add_move_cell(moveChange, other_cell, newBel); } - ctx->bindBel(newBel, cell, STRENGTH_WEAK); - - if (other_cell != nullptr) { - ctx->bindBel(oldBel, other_cell, STRENGTH_WEAK); - } if (!ctx->isBelLocationValid(newBel) || ((other_cell != nullptr && !ctx->isBelLocationValid(oldBel)))) { ctx->unbindBel(newBel); if (other_cell != nullptr) @@ -430,7 +434,7 @@ class SAPlacer delta += (cfg.constraintWeight / temp) * (new_dist - old_dist); n_move++; // SA acceptance criterea - if (delta < 0 || (temp > 1e-6 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) { + if (delta < 0 || (temp > 1e-6 && (ctx->rng() / float(0x0fffffff)) <= std::exp(-delta / temp))) { n_accept++; } else { if (other_cell != nullptr) From 3938ea41fc00afc441c411c3de0403d4652d08c4 Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 7 Dec 2018 20:54:41 +0000 Subject: [PATCH 04/59] placer1: Fix cost delta calculation Signed-off-by: David Shah --- common/placer1.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/common/placer1.cc b/common/placer1.cc index baddf4eb..fe56b4af 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -431,7 +431,7 @@ class SAPlacer new_dist += get_constraints_distance(ctx, other_cell); delta = lambda * (moveChange.timing_delta / last_timing_cost) + (1 - lambda) * (double(moveChange.wirelen_delta) / last_wirelen_cost); - delta += (cfg.constraintWeight / temp) * (new_dist - old_dist); + delta += (cfg.constraintWeight / temp) * (new_dist - old_dist) / last_wirelen_cost; n_move++; // SA acceptance criterea if (delta < 0 || (temp > 1e-6 && (ctx->rng() / float(0x0fffffff)) <= std::exp(-delta / temp))) { @@ -578,6 +578,8 @@ class SAPlacer changed_arcs.clear(); new_net_bounds.clear(); new_arc_costs.clear(); + wirelen_delta = 0; + timing_delta = 0; } } moveChange; From a218c27305c9be824531195d99d68c6f86782830 Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 7 Dec 2018 21:23:34 +0000 Subject: [PATCH 05/59] placer1: Tuning Signed-off-by: David Shah --- common/placer1.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index fe56b4af..f78bf16c 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -206,8 +206,8 @@ class SAPlacer if (iter % 5 == 0 || iter == 1) log_info(" at iteration #%d: temp = %f, timing cost = " - "%.0f, wirelen = %.0f est tns = %.02fns\n", - iter, temp, double(curr_timing_cost), double(curr_wirelen_cost), curr_tns); + "%.0f, wirelen = %.0f\n", + iter, temp, double(curr_timing_cost), double(curr_wirelen_cost)); for (int m = 0; m < 15; ++m) { // Loop through all automatically placed cells @@ -234,8 +234,8 @@ class SAPlacer if (temp <= 1e-3 && n_no_progress >= 5) { log_info(" at iteration #%d: temp = %f, timing cost = " - "%.0f, wirelen = %.0f est tns = %.02fns\n", - iter, temp, double(curr_timing_cost), double(curr_wirelen_cost), curr_tns); + "%.0f, wirelen = %.0f \n", + iter, temp, double(curr_timing_cost), double(curr_wirelen_cost)); break; } @@ -434,7 +434,7 @@ class SAPlacer delta += (cfg.constraintWeight / temp) * (new_dist - old_dist) / last_wirelen_cost; n_move++; // SA acceptance criterea - if (delta < 0 || (temp > 1e-6 && (ctx->rng() / float(0x0fffffff)) <= std::exp(-delta / temp))) { + if (delta < 0 || (temp > 1e-6 && (ctx->rng() / float(0x0fffffff)) <= std::exp(-100*delta / temp))) { n_accept++; } else { if (other_cell != nullptr) @@ -670,7 +670,6 @@ class SAPlacer NetCriticalityMap net_crit; Context *ctx; - float curr_tns = 0; float temp = 1000; float crit_exp = 8; float lambda = 0.5; From 0d80181c5e547aad165ee35a1d1a6fc85505bb25 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sat, 8 Dec 2018 13:59:10 +0000 Subject: [PATCH 06/59] placer1: Make budget-based placement an option Signed-off-by: David Shah --- common/command.cc | 5 +++++ common/placer1.cc | 51 +++++++++++++++++++++++++++-------------------- common/placer1.h | 1 + 3 files changed, 35 insertions(+), 22 deletions(-) diff --git a/common/command.cc b/common/command.cc index 1399efdb..dd351c0d 100644 --- a/common/command.cc +++ b/common/command.cc @@ -122,6 +122,8 @@ po::options_description CommandHandler::getGeneralOptions() general.add_options()("randomize-seed,r", "randomize seed value for random number generator"); general.add_options()("slack_redist_iter", po::value(), "number of iterations between slack redistribution"); general.add_options()("cstrweight", po::value(), "placer weighting for relative constraint satisfaction"); + general.add_options()("placer-budgets", "use budget rather than criticality in placer timing weights"); + general.add_options()("pack-only", "pack design only without placement or routing"); general.add_options()("ignore-loops", "ignore combinational loops in timing analysis"); @@ -187,6 +189,9 @@ void CommandHandler::setupContext(Context *ctx) settings->set("placer1/constraintWeight", vm["cstrweight"].as()); } + if (vm.count("placer-budgets")) { + settings->set("placer1/budgetBased", true); + } if (vm.count("freq")) { auto freq = vm["freq"].as(); if (freq > 0) diff --git a/common/placer1.cc b/common/placer1.cc index f78bf16c..64c948a3 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -43,18 +43,17 @@ #include "timing.h" #include "util.h" namespace std { - template <> struct hash> +template <> struct hash> +{ + std::size_t operator()(const std::pair &idp) const noexcept { - std::size_t - operator()(const std::pair &idp) const noexcept - { - std::size_t seed = 0; - boost::hash_combine(seed, hash()(idp.first)); - boost::hash_combine(seed, hash()(idp.second)); - return seed; - } - }; -} + std::size_t seed = 0; + boost::hash_combine(seed, hash()(idp.first)); + boost::hash_combine(seed, hash()(idp.second)); + return seed; + } +}; +} // namespace std NEXTPNR_NAMESPACE_BEGIN @@ -175,7 +174,7 @@ class SAPlacer if ((placed_cells - constr_placed_cells) % 500 != 0) log_info(" initial placement placed %d/%d cells\n", int(placed_cells - constr_placed_cells), int(autoplaced.size())); - if (ctx->slack_redist_iter > 0) + if (cfg.budgetBased && ctx->slack_redist_iter > 0) assign_budget(ctx); ctx->yield(); auto iplace_end = std::chrono::high_resolution_clock::now(); @@ -184,7 +183,8 @@ class SAPlacer log_info("Running simulated annealing placer.\n"); // Invoke timing analysis to obtain criticalities - get_criticalities(ctx, &net_crit); + if (!cfg.budgetBased) + get_criticalities(ctx, &net_crit); // Calculate costs after initial placement setup_costs(); @@ -280,16 +280,17 @@ class SAPlacer ctx->shuffle(autoplaced); // Legalisation is a big change so force a slack redistribution here - if (ctx->slack_redist_iter > 0) + if (ctx->slack_redist_iter > 0 && cfg.budgetBased) assign_budget(ctx, true /* quiet */); } require_legal = false; - } else if (ctx->slack_redist_iter > 0 && iter % ctx->slack_redist_iter == 0) { + } else if (cfg.budgetBased && ctx->slack_redist_iter > 0 && iter % ctx->slack_redist_iter == 0) { assign_budget(ctx, true /* quiet */); } // Invoke timing analysis to obtain criticalities - get_criticalities(ctx, &net_crit); + if (!cfg.budgetBased) + get_criticalities(ctx, &net_crit); // Need to rebuild costs after criticalities change setup_costs(); // Recalculate total metric entirely to avoid rounding errors @@ -434,7 +435,7 @@ class SAPlacer delta += (cfg.constraintWeight / temp) * (new_dist - old_dist) / last_wirelen_cost; n_move++; // SA acceptance criterea - if (delta < 0 || (temp > 1e-6 && (ctx->rng() / float(0x0fffffff)) <= std::exp(-100*delta / temp))) { + if (delta < 0 || (temp > 1e-6 && (ctx->rng() / float(0x0fffffff)) <= std::exp(-100 * delta / temp))) { n_accept++; } else { if (other_cell != nullptr) @@ -518,11 +519,16 @@ class SAPlacer return 0; if (ctx->getPortTimingClass(net->driver.cell, net->driver.port, cc) == TMG_IGNORE) return 0; - auto crit = net_crit.find(net->name); - if (crit == net_crit.end() || crit->second.criticality.empty()) - return 0; - double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user))); - return delay * std::pow(crit->second.criticality.at(user), crit_exp); + if (cfg.budgetBased) { + double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user))); + return std::min(10.0, std::exp(delay - ctx->getDelayNS(net->users.at(user).budget))); + } else { + auto crit = net_crit.find(net->name); + if (crit == net_crit.end() || crit->second.criticality.empty()) + return 0; + double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user))); + return delay * std::pow(crit->second.criticality.at(user), crit_exp); + } } // Set up the cost maps @@ -690,6 +696,7 @@ Placer1Cfg::Placer1Cfg(Context *ctx) : Settings(ctx) { constraintWeight = get("placer1/constraintWeight", 10); minBelsForGridPick = get("placer1/minBelsForGridPick", 64); + budgetBased = get("placer1/budgetBased", false); } bool placer1(Context *ctx, Placer1Cfg cfg) diff --git a/common/placer1.h b/common/placer1.h index 7305f4b1..2c3808f0 100644 --- a/common/placer1.h +++ b/common/placer1.h @@ -29,6 +29,7 @@ struct Placer1Cfg : public Settings Placer1Cfg(Context *ctx); float constraintWeight; int minBelsForGridPick; + bool budgetBased; }; extern bool placer1(Context *ctx, Placer1Cfg cfg); From 3650c8a0e74f9072813ecd085955a584264f5a76 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 9 Dec 2018 10:38:08 +0000 Subject: [PATCH 07/59] placer1: Tweaking temperature Signed-off-by: David Shah --- common/placer1.cc | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index 64c948a3..9b8b4de0 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -197,7 +197,7 @@ class SAPlacer wirelen_t min_wirelen = curr_wirelen_cost; int n_no_progress = 0; - temp = 10000; + temp = 1; // Main simulated annealing loop for (int iter = 1;; iter++) { @@ -232,7 +232,7 @@ class SAPlacer else n_no_progress++; - if (temp <= 1e-3 && n_no_progress >= 5) { + if (temp <= 1e-7 && n_no_progress >= 5) { log_info(" at iteration #%d: temp = %f, timing cost = " "%.0f, wirelen = %.0f \n", iter, temp, double(curr_timing_cost), double(curr_wirelen_cost)); @@ -244,8 +244,13 @@ class SAPlacer int M = std::max(max_x, max_y) + 1; double upper = 0.6, lower = 0.4; - - if (curr_wirelen_cost < 0.95 * avg_wirelen && curr_wirelen_cost > 0) { + + if (ctx->verbose) + log("iter #%d: temp = %f, timing cost = " + "%.0f, wirelen = %.0f, dia = %d, Ra = %.02f \n", + iter, temp, double(curr_timing_cost), double(curr_wirelen_cost), diameter, Raccept); + + if (curr_wirelen_cost < 0.95 * avg_wirelen && curr_wirelen_cost > 0) { avg_wirelen = 0.8 * avg_wirelen + 0.2 * curr_wirelen_cost; } else { if (Raccept >= 0.8) { @@ -276,7 +281,7 @@ class SAPlacer autoplaced.push_back(cell.second); } temp = post_legalise_temp; - diameter *= post_legalise_dia_scale; + diameter = std::min(M, diameter * post_legalise_dia_scale); ctx->shuffle(autoplaced); // Legalisation is a big change so force a slack redistribution here @@ -435,7 +440,7 @@ class SAPlacer delta += (cfg.constraintWeight / temp) * (new_dist - old_dist) / last_wirelen_cost; n_move++; // SA acceptance criterea - if (delta < 0 || (temp > 1e-6 && (ctx->rng() / float(0x0fffffff)) <= std::exp(-100 * delta / temp))) { + if (delta < 0 || (temp > 1e-8 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) { n_accept++; } else { if (other_cell != nullptr) @@ -676,7 +681,7 @@ class SAPlacer NetCriticalityMap net_crit; Context *ctx; - float temp = 1000; + float temp = 10; float crit_exp = 8; float lambda = 0.5; bool improved = false; @@ -686,8 +691,8 @@ class SAPlacer std::vector>>> fast_bels; std::unordered_set locked_bels; bool require_legal = true; - const float legalise_temp = 1; - const float post_legalise_temp = 10; + const float legalise_temp = 0.00015; + const float post_legalise_temp = 0.0003; const float post_legalise_dia_scale = 1.5; Placer1Cfg cfg; }; From 23306c163f48250140ca770454cbe893630aad05 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 9 Dec 2018 13:48:50 +0000 Subject: [PATCH 08/59] placer1: Allow chain position swaps after legalisation Signed-off-by: David Shah --- common/place_common.cc | 4 +- common/placer1.cc | 116 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 116 insertions(+), 4 deletions(-) diff --git a/common/place_common.cc b/common/place_common.cc index b3eb4267..0a7b29c7 100644 --- a/common/place_common.cc +++ b/common/place_common.cc @@ -304,7 +304,7 @@ class ConstraintLegaliseWorker // Set the strength to locked on all cells in chain void lockdown_chain(CellInfo *root) { - root->belStrength = STRENGTH_LOCKED; + root->belStrength = STRENGTH_STRONG; for (auto child : root->constr_children) lockdown_chain(child); } @@ -380,7 +380,7 @@ class ConstraintLegaliseWorker rippedCells.insert(confl_cell->name); } } - ctx->bindBel(target, ctx->cells.at(cp.first).get(), STRENGTH_LOCKED); + ctx->bindBel(target, ctx->cells.at(cp.first).get(), STRENGTH_STRONG); rippedCells.erase(cp.first); } for (auto cp : solution) { diff --git a/common/placer1.cc b/common/placer1.cc index 9b8b4de0..bf8ccd09 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -24,6 +24,7 @@ #include "placer1.h" #include #include +#include #include #include #include @@ -42,6 +43,8 @@ #include "place_common.h" #include "timing.h" #include "util.h" + + namespace std { template <> struct hash> { @@ -152,6 +155,8 @@ class SAPlacer // Sort to-place cells for deterministic initial placement std::vector autoplaced; + std::vector chain_basis; + for (auto &cell : ctx->cells) { CellInfo *ci = cell.second.get(); if (ci->bel == BelId()) { @@ -219,6 +224,13 @@ class SAPlacer if (try_bel != BelId() && try_bel != cell->bel) try_swap_position(cell, try_bel); } + // Also try swapping chains, if applicable + for (auto cb : chain_basis) { + Loc chain_base_loc = ctx->getBelLocation(cb->bel); + BelId try_base = random_bel_for_cell(cb, chain_base_loc.z); + if (try_base != BelId() && try_base != cb->bel) + try_swap_chain(cb, try_base); + } } if (curr_wirelen_cost < min_wirelen) { @@ -276,8 +288,11 @@ class SAPlacer if (legalise_relative_constraints(ctx)) { // Only increase temperature if something was moved autoplaced.clear(); + chain_basis.clear(); for (auto cell : sorted(ctx->cells)) { - if (cell.second->belStrength < STRENGTH_STRONG) + if (cell.second->belStrength <= STRENGTH_STRONG && cell.second->constr_parent == nullptr && !cell.second->constr_children.empty()) + chain_basis.push_back(cell.second); + else if (cell.second->belStrength < STRENGTH_STRONG) autoplaced.push_back(cell.second); } temp = post_legalise_temp; @@ -458,9 +473,101 @@ class SAPlacer return false; } + inline bool is_constrained(CellInfo *cell) { + return cell->constr_parent != nullptr || !cell->constr_children.empty(); + } + + // Swap the Bel of a cell with another, return the original location + BelId swap_cell_bels(CellInfo *cell, BelId newBel) { + BelId oldBel = cell->bel; + CellInfo *bound = ctx->getBoundBelCell(newBel); + if (bound != nullptr) + ctx->unbindBel(newBel); + ctx->unbindBel(oldBel); + ctx->bindBel(newBel, cell, is_constrained(cell) ? STRENGTH_STRONG : STRENGTH_WEAK); + if (bound != nullptr) + ctx->bindBel(oldBel, bound, is_constrained(bound) ? STRENGTH_STRONG : STRENGTH_WEAK); + return oldBel; + } + + // Discover the relative positions of all cells in a chain + void discover_chain(Loc baseLoc, CellInfo *cell, std::vector> &cell_rel) { + Loc cellLoc = ctx->getBelLocation(cell->bel); + Loc rel{cellLoc.x - baseLoc.x, cellLoc.y - baseLoc.y, cellLoc.z}; + cell_rel.emplace_back(std::make_pair(cell, rel)); + for (auto child : cell->constr_children) + discover_chain(baseLoc, child, cell_rel); + } + + // Attempt to swap a chain with a non-chain + bool try_swap_chain(CellInfo *cell, BelId newBase) { + std::vector> cell_rel; + std::unordered_set cells; + std::vector> moves_made; + std::vector> dest_bels; + double delta = 0; + moveChange.reset(); + log_info("finding cells for chain swap %s\n", cell->name.c_str(ctx)); + + Loc baseLoc = ctx->getBelLocation(cell->bel); + discover_chain(baseLoc, cell, cell_rel); + Loc newBaseLoc = ctx->getBelLocation(newBase); + NPNR_ASSERT(newBaseLoc.z == baseLoc.z); + for (const auto &cr : cell_rel) + cells.insert(cr.first->name); + + for (const auto &cr : cell_rel) { + Loc targetLoc = {newBaseLoc.x + cr.second.x, newBaseLoc.y + cr.second.y, cr.second.z}; + BelId targetBel = ctx->getBelByLocation(targetLoc); + if (targetBel == BelId()) + return false; + if (ctx->getBelType(targetBel) != cell->type) + return false; + CellInfo *bound = ctx->getBoundBelCell(targetBel); + // We don't consider swapping chains with other chains, at least for the time being - unless it is + // part of this chain + if (bound != nullptr && !cells.count(bound->name) && (bound->belStrength >= STRENGTH_STRONG || is_constrained(bound))) + return false; + dest_bels.emplace_back(std::make_pair(cr.first, targetBel)); + } + log_info("trying chain swap %s\n", cell->name.c_str(ctx)); + // + for (const auto &db : dest_bels) { + BelId oldBel = swap_cell_bels(db.first, db.second); + moves_made.emplace_back(std::make_pair(db.first, oldBel)); + } + for (const auto &mm : moves_made) { + if (!ctx->isBelLocationValid(mm.first->bel)) + goto swap_fail; + if (!ctx->isBelLocationValid(mm.second)) + goto swap_fail; + add_move_cell(moveChange, mm.first, mm.second); + CellInfo *bound = ctx->getBoundBelCell(mm.second); + if (bound != nullptr) + add_move_cell(moveChange, bound, mm.first->bel); + } + compute_cost_changes(moveChange); + delta = lambda * (moveChange.timing_delta / last_timing_cost) + + (1 - lambda) * (double(moveChange.wirelen_delta) / last_wirelen_cost); + n_move++; + // SA acceptance criterea + if (delta < 0 || (temp > 1e-8 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) { + n_accept++; + log_info("accepted chain swap %s\n", cell->name.c_str(ctx)); + } else { + goto swap_fail; + } + commit_cost_changes(moveChange); + return true; +swap_fail: + for (const auto &entry : boost::adaptors::reverse(moves_made)) + swap_cell_bels(entry.first, entry.second); + return false; + } + // Find a random Bel of the correct type for a cell, within the specified // diameter - BelId random_bel_for_cell(CellInfo *cell) + BelId random_bel_for_cell(CellInfo *cell, int force_z = -1) { IdString targetType = cell->type; Loc curr_loc = ctx->getBelLocation(cell->bel); @@ -479,6 +586,11 @@ class SAPlacer if (fb.size() == 0) continue; BelId bel = fb.at(ctx->rng(int(fb.size()))); + if (force_z != -1) { + Loc loc = ctx->getBelLocation(bel); + if (loc.z != force_z) + continue; + } if (locked_bels.find(bel) != locked_bels.end()) continue; return bel; From c926b273ded6e6d805ae66d1f9c5be8a85156ca8 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 9 Dec 2018 13:57:41 +0000 Subject: [PATCH 09/59] placer1: Tweaks Signed-off-by: David Shah --- common/placer1.cc | 9 ++++++--- common/timing.cc | 3 +-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index bf8ccd09..db10b7d3 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -507,7 +507,8 @@ class SAPlacer std::vector> dest_bels; double delta = 0; moveChange.reset(); - log_info("finding cells for chain swap %s\n", cell->name.c_str(ctx)); + if (ctx->debug) + log_info("finding cells for chain swap %s\n", cell->name.c_str(ctx)); Loc baseLoc = ctx->getBelLocation(cell->bel); discover_chain(baseLoc, cell, cell_rel); @@ -530,7 +531,8 @@ class SAPlacer return false; dest_bels.emplace_back(std::make_pair(cr.first, targetBel)); } - log_info("trying chain swap %s\n", cell->name.c_str(ctx)); + if (ctx->debug) + log_info("trying chain swap %s\n", cell->name.c_str(ctx)); // for (const auto &db : dest_bels) { BelId oldBel = swap_cell_bels(db.first, db.second); @@ -553,7 +555,8 @@ class SAPlacer // SA acceptance criterea if (delta < 0 || (temp > 1e-8 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) { n_accept++; - log_info("accepted chain swap %s\n", cell->name.c_str(ctx)); + if (ctx->debug) + log_info("accepted chain swap %s\n", cell->name.c_str(ctx)); } else { goto swap_fail; } diff --git a/common/timing.cc b/common/timing.cc index 2a0af874..17adc078 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -611,8 +611,7 @@ struct Timing continue; delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay; for (size_t i = 0; i < net->users.size(); i++) { - float criticality = - 1.0f - ((float(nc.slack.at(i)) - float(worst_slack.at(startdomain.first))) / dmax); + float criticality = 1.0f - (float(nc.slack.at(i) - worst_slack.at(startdomain.first)) / dmax); nc.criticality.at(i) = std::min(1.0, std::max(0.0, criticality)); } nc.max_path_length = nd.max_path_length; From 222abb5be29a95538f37ed28760df68b037b2a52 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 9 Dec 2018 14:41:02 +0000 Subject: [PATCH 10/59] placer1: Encourage chain swaps Signed-off-by: David Shah --- common/placer1.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/placer1.cc b/common/placer1.cc index db10b7d3..ecb61b5c 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -553,7 +553,7 @@ class SAPlacer (1 - lambda) * (double(moveChange.wirelen_delta) / last_wirelen_cost); n_move++; // SA acceptance criterea - if (delta < 0 || (temp > 1e-8 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) { + if (delta < 0 || (temp > 1e-9 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / (5 * temp)))) { n_accept++; if (ctx->debug) log_info("accepted chain swap %s\n", cell->name.c_str(ctx)); From 0cb351df52396dfa440ecebf6db148445f0afee2 Mon Sep 17 00:00:00 2001 From: David Shah Date: Mon, 10 Dec 2018 18:53:21 +0000 Subject: [PATCH 11/59] placer1: New temperature heuristic Signed-off-by: David Shah --- common/command.cc | 4 ++++ common/placer1.cc | 32 +++++++++++++------------------- common/placer1.h | 1 + 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/common/command.cc b/common/command.cc index dd351c0d..b7fc13e6 100644 --- a/common/command.cc +++ b/common/command.cc @@ -122,6 +122,7 @@ po::options_description CommandHandler::getGeneralOptions() general.add_options()("randomize-seed,r", "randomize seed value for random number generator"); general.add_options()("slack_redist_iter", po::value(), "number of iterations between slack redistribution"); general.add_options()("cstrweight", po::value(), "placer weighting for relative constraint satisfaction"); + general.add_options()("starttemp", po::value(), "placer SA start temperature"); general.add_options()("placer-budgets", "use budget rather than criticality in placer timing weights"); general.add_options()("pack-only", "pack design only without placement or routing"); @@ -188,6 +189,9 @@ void CommandHandler::setupContext(Context *ctx) if (vm.count("cstrweight")) { settings->set("placer1/constraintWeight", vm["cstrweight"].as()); } + if (vm.count("starttemp")) { + settings->set("placer1/startTemp", vm["starttemp"].as()); + } if (vm.count("placer-budgets")) { settings->set("placer1/budgetBased", true); diff --git a/common/placer1.cc b/common/placer1.cc index ecb61b5c..f69cc500 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -202,7 +202,7 @@ class SAPlacer wirelen_t min_wirelen = curr_wirelen_cost; int n_no_progress = 0; - temp = 1; + temp = cfg.startTemp; // Main simulated annealing loop for (int iter = 1;; iter++) { @@ -255,8 +255,6 @@ class SAPlacer int M = std::max(max_x, max_y) + 1; - double upper = 0.6, lower = 0.4; - if (ctx->verbose) log("iter #%d: temp = %f, timing cost = " "%.0f, wirelen = %.0f, dia = %d, Ra = %.02f \n", @@ -265,21 +263,16 @@ class SAPlacer if (curr_wirelen_cost < 0.95 * avg_wirelen && curr_wirelen_cost > 0) { avg_wirelen = 0.8 * avg_wirelen + 0.2 * curr_wirelen_cost; } else { - if (Raccept >= 0.8) { - temp *= 0.7; - } else if (Raccept > upper) { - if (diameter < M) - diameter++; - else - temp *= 0.9; - } else if (Raccept > lower) { + double diam_next = diameter * (1.0 - 0.44 + Raccept); + diameter = std::max(1, std::min(M, int(diam_next + 0.5))); + if (Raccept > 0.96) { + temp *= 0.5; + } else if (Raccept > 0.8) { + temp *= 0.9; + } else if (Raccept > 0.15 && diameter > 1) { temp *= 0.95; } else { - // Raccept < 0.3 - if (diameter > 1) - diameter--; - else - temp *= 0.8; + temp *= 0.8; } } // Once cooled below legalise threshold, run legalisation and start requiring @@ -553,7 +546,7 @@ class SAPlacer (1 - lambda) * (double(moveChange.wirelen_delta) / last_wirelen_cost); n_move++; // SA acceptance criterea - if (delta < 0 || (temp > 1e-9 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / (5 * temp)))) { + if (delta < 0 || (temp > 1e-9 && (ctx->rng() / float(0x3fffffff)) <= std::exp(-delta / temp))) { n_accept++; if (ctx->debug) log_info("accepted chain swap %s\n", cell->name.c_str(ctx)); @@ -806,8 +799,8 @@ swap_fail: std::vector>>> fast_bels; std::unordered_set locked_bels; bool require_legal = true; - const float legalise_temp = 0.00015; - const float post_legalise_temp = 0.0003; + const float legalise_temp = 0.001; + const float post_legalise_temp = 0.002; const float post_legalise_dia_scale = 1.5; Placer1Cfg cfg; }; @@ -817,6 +810,7 @@ Placer1Cfg::Placer1Cfg(Context *ctx) : Settings(ctx) constraintWeight = get("placer1/constraintWeight", 10); minBelsForGridPick = get("placer1/minBelsForGridPick", 64); budgetBased = get("placer1/budgetBased", false); + startTemp = get ("placer1/startTemp", 1); } bool placer1(Context *ctx, Placer1Cfg cfg) diff --git a/common/placer1.h b/common/placer1.h index 2c3808f0..aafc840c 100644 --- a/common/placer1.h +++ b/common/placer1.h @@ -30,6 +30,7 @@ struct Placer1Cfg : public Settings float constraintWeight; int minBelsForGridPick; bool budgetBased; + float startTemp; }; extern bool placer1(Context *ctx, Placer1Cfg cfg); From ade72de02faf72c2458b10dc94adf2326cc5c759 Mon Sep 17 00:00:00 2001 From: David Shah Date: Thu, 13 Dec 2018 13:15:12 +0000 Subject: [PATCH 12/59] placer1: Optimise for performance Signed-off-by: David Shah --- common/placer1.cc | 78 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 20 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index f69cc500..dca9089a 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -102,10 +102,28 @@ class SAPlacer } diameter = std::max(max_x, max_y) + 1; + net_bounds.resize(ctx->nets.size()); + net_arc_tcost.resize(ctx->nets.size()); + moveChange.already_bounds_changed.resize(ctx->nets.size()); + moveChange.already_changed_arcs.resize(ctx->nets.size()); + old_udata.reserve(ctx->nets.size()); + net_by_udata.reserve(ctx->nets.size()); + decltype(NetInfo::udata) n = 0; + for (auto &net : ctx->nets) { + old_udata.emplace_back(net.second->udata); + net_arc_tcost.at(n).resize(net.second->users.size()); + moveChange.already_changed_arcs.at(n).resize(net.second->users.size()); + net.second->udata = n++; + net_by_udata.push_back(net.second.get()); + } + build_port_index(); } - ~SAPlacer() {} + ~SAPlacer() { + for (auto &net : ctx->nets) + net.second->udata = old_udata[net.second->udata]; + } bool place() { @@ -651,10 +669,9 @@ swap_fail: NetInfo *ni = net.second; if (ignore_net(ni)) continue; - net_bounds[ni->name] = get_net_bounds(ni); - net_arc_tcost[ni->name].resize(ni->users.size()); + net_bounds[ni->udata] = get_net_bounds(ni); for (size_t i = 0; i < ni->users.size(); i++) - net_arc_tcost[ni->name][i] = get_timing_cost(ni, i); + net_arc_tcost[ni->udata][i] = get_timing_cost(ni, i); } } @@ -663,7 +680,7 @@ swap_fail: { wirelen_t cost = 0; for (const auto &net : net_bounds) - cost += net.second.hpwl(); + cost += net.hpwl(); return cost; } @@ -672,7 +689,7 @@ swap_fail: { double cost = 0; for (const auto &net : net_arc_tcost) { - for (auto arc_cost : net.second) { + for (auto arc_cost : net) { cost += arc_cost; } } @@ -682,17 +699,24 @@ swap_fail: // Cost-change-related data for a move struct MoveChangeData { - std::unordered_set bounds_changed_nets; - std::unordered_set> changed_arcs; + std::vector bounds_changed_nets; + std::vector> changed_arcs; - std::unordered_map new_net_bounds; - std::unordered_map, double> new_arc_costs; + std::vector already_bounds_changed; + std::vector> already_changed_arcs; + + std::vector> new_net_bounds; + std::vector, double>> new_arc_costs; wirelen_t wirelen_delta = 0; double timing_delta = 0; void reset() { + for (auto bc : bounds_changed_nets) + already_bounds_changed[bc] = false; + for (const auto &tc : changed_arcs) + already_changed_arcs[tc.first][tc.second] = false; bounds_changed_nets.clear(); changed_arcs.clear(); new_net_bounds.clear(); @@ -714,20 +738,30 @@ swap_fail: continue; if (ignore_net(pn)) continue; - const BoundingBox &curr_bounds = net_bounds[pn->name]; + const BoundingBox &curr_bounds = net_bounds[pn->udata]; // If the old location was at the edge of the bounds, or the new location exceeds the bounds, // an update is needed if (curr_bounds.touches_bounds(old_loc.x, old_loc.y) || !curr_bounds.is_inside_inc(curr_loc.x, curr_loc.y)) - mc.bounds_changed_nets.insert(pn->name); + if (!mc.already_bounds_changed[pn->udata]) { + mc.bounds_changed_nets.push_back(pn->udata); + mc.already_bounds_changed[pn->udata] = true; + } // Output ports - all arcs change timing if (port.second.type == PORT_OUT) { int cc; TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc); if (cls != TMG_IGNORE) for (size_t i = 0; i < pn->users.size(); i++) - mc.changed_arcs.insert(std::make_pair(pn->name, i)); + if (!mc.already_changed_arcs[pn->udata][i]) { + mc.changed_arcs.emplace_back(std::make_pair(pn->udata, i)); + mc.already_changed_arcs[pn->udata][i] = true; + } } else if (port.second.type == PORT_IN) { - mc.changed_arcs.insert(std::make_pair(pn->name, fast_port_to_user.at(&port.second))); + auto usr = fast_port_to_user.at(&port.second); + if (!mc.already_changed_arcs[pn->udata][usr]) { + mc.changed_arcs.emplace_back(std::make_pair(pn->udata, usr)); + mc.already_changed_arcs[pn->udata][usr] = true; + } } } } @@ -736,16 +770,18 @@ swap_fail: { for (const auto &bc : md.bounds_changed_nets) { wirelen_t old_hpwl = net_bounds.at(bc).hpwl(); - auto bounds = get_net_bounds(ctx->nets.at(bc).get()); - md.new_net_bounds[bc] = bounds; + auto bounds = get_net_bounds(net_by_udata.at(bc)); + md.new_net_bounds.emplace_back(std::make_pair(bc, bounds)); md.wirelen_delta += (bounds.hpwl() - old_hpwl); + md.already_bounds_changed[bc] = false; } for (const auto &tc : md.changed_arcs) { double old_cost = net_arc_tcost.at(tc.first).at(tc.second); - double new_cost = get_timing_cost(ctx->nets.at(tc.first).get(), tc.second); - md.new_arc_costs[tc] = new_cost; + double new_cost = get_timing_cost(net_by_udata.at(tc.first), tc.second); + md.new_arc_costs.emplace_back(std::make_pair(tc, new_cost)); md.timing_delta += (new_cost - old_cost); + md.already_changed_arcs[tc.first][tc.second] = false; } } @@ -774,9 +810,9 @@ swap_fail: inline double curr_metric() { return lambda * curr_timing_cost + (1 - lambda) * curr_wirelen_cost; } // Map nets to their bounding box (so we can skip recompute for moves that do not exceed the bounds - std::unordered_map net_bounds; + std::vector net_bounds; // Map net arcs to their timing cost (criticality * delay ns) - std::unordered_map> net_arc_tcost; + std::vector> net_arc_tcost; // Fast lookup for cell port to net user index std::unordered_map fast_port_to_user; @@ -798,6 +834,8 @@ swap_fail: std::unordered_map> bel_types; std::vector>>> fast_bels; std::unordered_set locked_bels; + std::vector net_by_udata; + std::vector old_udata; bool require_legal = true; const float legalise_temp = 0.001; const float post_legalise_temp = 0.002; From ae33ff397f5bc97d047639a7fb76d5d888050cb2 Mon Sep 17 00:00:00 2001 From: David Shah Date: Thu, 13 Dec 2018 14:27:33 +0000 Subject: [PATCH 13/59] placer1: Consider regions during placement Signed-off-by: David Shah --- common/place_common.cc | 8 ++++++++ common/place_common.h | 4 ++++ common/placer1.cc | 42 ++++++++++++++++++++++++++---------------- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/common/place_common.cc b/common/place_common.cc index 0a7b29c7..73a320d0 100644 --- a/common/place_common.cc +++ b/common/place_common.cc @@ -529,4 +529,12 @@ int get_constraints_distance(const Context *ctx, const CellInfo *cell) return dist; } +bool check_cell_bel_region(const CellInfo *cell, BelId bel) +{ + if (cell->region != nullptr && cell->region->constr_bels && !cell->region->bels.count(bel)) + return false; + else + return true; +} + NEXTPNR_NAMESPACE_END diff --git a/common/place_common.h b/common/place_common.h index 79dec067..fa5ce4c2 100644 --- a/common/place_common.h +++ b/common/place_common.h @@ -49,6 +49,10 @@ bool legalise_relative_constraints(Context *ctx); // Get the total distance from satisfied constraints for a cell int get_constraints_distance(const Context *ctx, const CellInfo *cell); + +// Check that a Bel is within the region for a cell +bool check_cell_bel_region(const CellInfo *cell, BelId bel); + NEXTPNR_NAMESPACE_END #endif diff --git a/common/placer1.cc b/common/placer1.cc index dca9089a..416c0d31 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -44,7 +44,6 @@ #include "timing.h" #include "util.h" - namespace std { template <> struct hash> { @@ -120,7 +119,8 @@ class SAPlacer build_port_index(); } - ~SAPlacer() { + ~SAPlacer() + { for (auto &net : ctx->nets) net.second->udata = old_udata[net.second->udata]; } @@ -275,8 +275,8 @@ class SAPlacer if (ctx->verbose) log("iter #%d: temp = %f, timing cost = " - "%.0f, wirelen = %.0f, dia = %d, Ra = %.02f \n", - iter, temp, double(curr_timing_cost), double(curr_wirelen_cost), diameter, Raccept); + "%.0f, wirelen = %.0f, dia = %d, Ra = %.02f \n", + iter, temp, double(curr_timing_cost), double(curr_wirelen_cost), diameter, Raccept); if (curr_wirelen_cost < 0.95 * avg_wirelen && curr_wirelen_cost > 0) { avg_wirelen = 0.8 * avg_wirelen + 0.2 * curr_wirelen_cost; @@ -301,7 +301,8 @@ class SAPlacer autoplaced.clear(); chain_basis.clear(); for (auto cell : sorted(ctx->cells)) { - if (cell.second->belStrength <= STRENGTH_STRONG && cell.second->constr_parent == nullptr && !cell.second->constr_children.empty()) + if (cell.second->belStrength <= STRENGTH_STRONG && cell.second->constr_parent == nullptr && + !cell.second->constr_children.empty()) chain_basis.push_back(cell.second); else if (cell.second->belStrength < STRENGTH_STRONG) autoplaced.push_back(cell.second); @@ -484,12 +485,14 @@ class SAPlacer return false; } - inline bool is_constrained(CellInfo *cell) { + inline bool is_constrained(CellInfo *cell) + { return cell->constr_parent != nullptr || !cell->constr_children.empty(); } // Swap the Bel of a cell with another, return the original location - BelId swap_cell_bels(CellInfo *cell, BelId newBel) { + BelId swap_cell_bels(CellInfo *cell, BelId newBel) + { BelId oldBel = cell->bel; CellInfo *bound = ctx->getBoundBelCell(newBel); if (bound != nullptr) @@ -502,7 +505,8 @@ class SAPlacer } // Discover the relative positions of all cells in a chain - void discover_chain(Loc baseLoc, CellInfo *cell, std::vector> &cell_rel) { + void discover_chain(Loc baseLoc, CellInfo *cell, std::vector> &cell_rel) + { Loc cellLoc = ctx->getBelLocation(cell->bel); Loc rel{cellLoc.x - baseLoc.x, cellLoc.y - baseLoc.y, cellLoc.z}; cell_rel.emplace_back(std::make_pair(cell, rel)); @@ -511,10 +515,11 @@ class SAPlacer } // Attempt to swap a chain with a non-chain - bool try_swap_chain(CellInfo *cell, BelId newBase) { + bool try_swap_chain(CellInfo *cell, BelId newBase) + { std::vector> cell_rel; std::unordered_set cells; - std::vector> moves_made; + std::vector> moves_made; std::vector> dest_bels; double delta = 0; moveChange.reset(); @@ -538,7 +543,8 @@ class SAPlacer CellInfo *bound = ctx->getBoundBelCell(targetBel); // We don't consider swapping chains with other chains, at least for the time being - unless it is // part of this chain - if (bound != nullptr && !cells.count(bound->name) && (bound->belStrength >= STRENGTH_STRONG || is_constrained(bound))) + if (bound != nullptr && !cells.count(bound->name) && + (bound->belStrength >= STRENGTH_STRONG || is_constrained(bound))) return false; dest_bels.emplace_back(std::make_pair(cr.first, targetBel)); } @@ -550,12 +556,14 @@ class SAPlacer moves_made.emplace_back(std::make_pair(db.first, oldBel)); } for (const auto &mm : moves_made) { - if (!ctx->isBelLocationValid(mm.first->bel)) + if (!ctx->isBelLocationValid(mm.first->bel) || !check_cell_bel_region(mm.first, mm.first->bel)) goto swap_fail; if (!ctx->isBelLocationValid(mm.second)) goto swap_fail; - add_move_cell(moveChange, mm.first, mm.second); CellInfo *bound = ctx->getBoundBelCell(mm.second); + if (bound && !check_cell_bel_region(bound, bound->bel)) + goto swap_fail; + add_move_cell(moveChange, mm.first, mm.second); if (bound != nullptr) add_move_cell(moveChange, bound, mm.first->bel); } @@ -573,7 +581,7 @@ class SAPlacer } commit_cost_changes(moveChange); return true; -swap_fail: + swap_fail: for (const auto &entry : boost::adaptors::reverse(moves_made)) swap_cell_bels(entry.first, entry.second); return false; @@ -605,6 +613,8 @@ swap_fail: if (loc.z != force_z) continue; } + if (!check_cell_bel_region(cell, bel)) + continue; if (locked_bels.find(bel) != locked_bels.end()) continue; return bel; @@ -834,7 +844,7 @@ swap_fail: std::unordered_map> bel_types; std::vector>>> fast_bels; std::unordered_set locked_bels; - std::vector net_by_udata; + std::vector net_by_udata; std::vector old_udata; bool require_legal = true; const float legalise_temp = 0.001; @@ -848,7 +858,7 @@ Placer1Cfg::Placer1Cfg(Context *ctx) : Settings(ctx) constraintWeight = get("placer1/constraintWeight", 10); minBelsForGridPick = get("placer1/minBelsForGridPick", 64); budgetBased = get("placer1/budgetBased", false); - startTemp = get ("placer1/startTemp", 1); + startTemp = get("placer1/startTemp", 1); } bool placer1(Context *ctx, Placer1Cfg cfg) From 493d6c3fb93fb7ffe96609ded9e392b327b2c86c Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 14 Dec 2018 12:16:29 +0000 Subject: [PATCH 14/59] Add Python helper functions for floorplanning Signed-off-by: David Shah --- common/nextpnr.cc | 21 +++++++++++++++++++++ common/nextpnr.h | 3 +++ common/pybindings.cc | 18 ++++++++++++++++++ common/pywrappers.h | 26 +++++++++++++++++++++++++- ecp5/arch_pybindings.cc | 7 +++++++ ice40/arch_pybindings.cc | 7 +++++++ 6 files changed, 81 insertions(+), 1 deletion(-) diff --git a/common/nextpnr.cc b/common/nextpnr.cc index bb941d3d..b0cbbbeb 100644 --- a/common/nextpnr.cc +++ b/common/nextpnr.cc @@ -421,4 +421,25 @@ void BaseCtx::addClock(IdString net, float freq) } } +void BaseCtx::createRectangularRegion(IdString name, int x0, int y0, int x1, int y1) +{ + std::unique_ptr new_region(new Region()); + new_region->name = name; + new_region->constr_bels = true; + new_region->constr_pips = false; + new_region->constr_wires = false; + for (int x = x0; x <= x1; x++) { + for (int y = y0; y <= y1; y++) { + for (auto bel : getCtx()->getBelsByTile(x, y)) + new_region->bels.insert(bel); + } + } + region[name] = std::move(new_region); +} +void BaseCtx::addBelToRegion(IdString name, BelId bel) { region[name]->bels.insert(bel); } +void BaseCtx::constrainCellToRegion(IdString cell, IdString region_name) +{ + cells[cell]->region = region[region_name].get(); +} + NEXTPNR_NAMESPACE_END diff --git a/common/nextpnr.h b/common/nextpnr.h index d58ae529..5967ecee 100644 --- a/common/nextpnr.h +++ b/common/nextpnr.h @@ -637,6 +637,9 @@ struct BaseCtx // Intended to simplify Python API void addClock(IdString net, float freq); + void createRectangularRegion(IdString name, int x0, int y0, int x1, int y1); + void addBelToRegion(IdString name, BelId bel); + void constrainCellToRegion(IdString cell, IdString region_name); }; NEXTPNR_NAMESPACE_END diff --git a/common/pybindings.cc b/common/pybindings.cc index 6cae889d..eee78b5e 100644 --- a/common/pybindings.cc +++ b/common/pybindings.cc @@ -104,6 +104,7 @@ BOOST_PYTHON_MODULE(MODULE_NAME) typedef std::unordered_map AttrMap; typedef std::unordered_map PortMap; typedef std::unordered_map PinMap; + typedef std::unordered_map> RegionMap; class_("BaseCtx", no_init); @@ -135,6 +136,8 @@ BOOST_PYTHON_MODULE(MODULE_NAME) typedef std::vector PortRefVector; typedef std::unordered_map WireMap; + typedef std::unordered_set BelSet; + typedef std::unordered_set WireSet; auto ni_cls = class_>("NetInfo", no_init); readwrite_wrapper, @@ -163,10 +166,25 @@ BOOST_PYTHON_MODULE(MODULE_NAME) def("parse_json", parse_json_shim); def("load_design", load_design_shim, return_value_policy()); + auto region_cls = class_>("Region", no_init); + readwrite_wrapper, + conv_from_str>::def_wrap(region_cls, "name"); + readwrite_wrapper, + pass_through>::def_wrap(region_cls, "constr_bels"); + readwrite_wrapper, + pass_through>::def_wrap(region_cls, "constr_bels"); + readwrite_wrapper, + pass_through>::def_wrap(region_cls, "constr_pips"); + readonly_wrapper>::def_wrap(region_cls, + "bels"); + readonly_wrapper>::def_wrap(region_cls, + "wires"); + WRAP_MAP(AttrMap, pass_through, "AttrMap"); WRAP_MAP(PortMap, wrap_context, "PortMap"); WRAP_MAP(PinMap, conv_to_str, "PinMap"); WRAP_MAP(WireMap, wrap_context, "WireMap"); + WRAP_MAP_UPTR(RegionMap, "RegionMap"); WRAP_VECTOR(PortRefVector, wrap_context); diff --git a/common/pywrappers.h b/common/pywrappers.h index 4e463afd..427c3623 100644 --- a/common/pywrappers.h +++ b/common/pywrappers.h @@ -269,7 +269,7 @@ template static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } }; -// Three parameters, one return +// Three parameters, no return template struct fn_wrapper_3a_v { @@ -288,6 +288,30 @@ struct fn_wrapper_3a_v template static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } }; +// Five parameters, no return +template +struct fn_wrapper_5a_v +{ + using class_type = typename WrapIfNotContext::maybe_wrapped_t; + using conv_arg1_type = typename arg1_conv::arg_type; + using conv_arg2_type = typename arg2_conv::arg_type; + using conv_arg3_type = typename arg3_conv::arg_type; + using conv_arg4_type = typename arg4_conv::arg_type; + using conv_arg5_type = typename arg5_conv::arg_type; + + static void wrapped_fn(class_type &cls, conv_arg1_type arg1, conv_arg2_type arg2, conv_arg3_type arg3, + conv_arg4_type arg4, conv_arg5_type arg5) + { + Context *ctx = get_ctx(cls); + Class &base = get_base(cls); + return (base.*fn)(arg1_conv()(ctx, arg1), arg2_conv()(ctx, arg2), arg3_conv()(ctx, arg3), + arg4_conv()(ctx, arg4), arg5_conv()(ctx, arg5)); + } + + template static void def_wrap(WrapCls cls_, const char *name) { cls_.def(name, wrapped_fn); } +}; + // Wrapped getter template struct readonly_wrapper { diff --git a/ecp5/arch_pybindings.cc b/ecp5/arch_pybindings.cc index 5e73a673..18d21112 100644 --- a/ecp5/arch_pybindings.cc +++ b/ecp5/arch_pybindings.cc @@ -133,6 +133,13 @@ void arch_wrap_python() fn_wrapper_2a_v, pass_through>::def_wrap(ctx_cls, "addClock"); + fn_wrapper_5a_v, pass_through, pass_through, pass_through, + pass_through>::def_wrap(ctx_cls, "createRectangularRegion"); + fn_wrapper_2a_v, + conv_from_str>::def_wrap(ctx_cls, "addBelToRegion"); + fn_wrapper_2a_v, conv_from_str>::def_wrap(ctx_cls, "constrainCellToRegion"); WRAP_RANGE(Bel, conv_to_str); WRAP_RANGE(Wire, conv_to_str); diff --git a/ice40/arch_pybindings.cc b/ice40/arch_pybindings.cc index f0ca584b..bc0bfb84 100644 --- a/ice40/arch_pybindings.cc +++ b/ice40/arch_pybindings.cc @@ -144,6 +144,13 @@ void arch_wrap_python() fn_wrapper_2a_v, pass_through>::def_wrap(ctx_cls, "addClock"); + fn_wrapper_5a_v, pass_through, pass_through, pass_through, + pass_through>::def_wrap(ctx_cls, "createRectangularRegion"); + fn_wrapper_2a_v, + conv_from_str>::def_wrap(ctx_cls, "addBelToRegion"); + fn_wrapper_2a_v, conv_from_str>::def_wrap(ctx_cls, "constrainCellToRegion"); WRAP_RANGE(Bel, conv_to_str); WRAP_RANGE(Wire, conv_to_str); From a8289b699fd12305abca6df52f20d4e80c834122 Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 14 Dec 2018 13:11:02 +0000 Subject: [PATCH 15/59] pycontainers: Implement __contains__ Signed-off-by: David Shah --- common/pycontainers.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/common/pycontainers.h b/common/pycontainers.h index 70f69c51..5de2f6d2 100644 --- a/common/pycontainers.h +++ b/common/pycontainers.h @@ -345,6 +345,12 @@ template struct map_wrapper std::terminate(); } + static bool contains(wrapped_map &x, std::string const &i) + { + K k = PythonConversion::string_converter().from_str(x.ctx, i); + return x.base.count(k); + } + static void wrap(const char *map_name, const char *kv_name, const char *kv_iter_name, const char *iter_name) { map_pair_wrapper::wrap(kv_name, kv_iter_name); @@ -353,6 +359,7 @@ template struct map_wrapper class_(map_name, no_init) .def("__iter__", rw::iter) .def("__len__", len) + .def("__contains__", contains) .def("__getitem__", get) .def("__setitem__", set, with_custodian_and_ward<1, 2>()); } @@ -465,6 +472,12 @@ template struct map_wrapper_uptr std::terminate(); } + static bool contains(wrapped_map &x, std::string const &i) + { + K k = PythonConversion::string_converter().from_str(x.ctx, i); + return x.base.count(k); + } + static void wrap(const char *map_name, const char *kv_name, const char *kv_iter_name, const char *iter_name) { map_pair_wrapper_uptr::wrap(kv_name, kv_iter_name); @@ -473,6 +486,7 @@ template struct map_wrapper_uptr class_(map_name, no_init) .def("__iter__", rw::iter) .def("__len__", len) + .def("__contains__", contains) .def("__getitem__", get) .def("__setitem__", set, with_custodian_and_ward<1, 2>()); } From 0118ac00c45c725a42b16697d1ac975f1be6f3b9 Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 14 Dec 2018 13:40:29 +0000 Subject: [PATCH 16/59] placer1: Improve handling of regions Signed-off-by: David Shah --- common/placer1.cc | 57 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index 416c0d31..e4fa0320 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -115,7 +115,29 @@ class SAPlacer net.second->udata = n++; net_by_udata.push_back(net.second.get()); } - + for (auto ®ion : sorted(ctx->region)) { + Region *r = region.second; + BoundingBox bb; + if (r->constr_bels) { + bb.x0 = std::numeric_limits::max(); + bb.x1 = std::numeric_limits::min(); + bb.y0 = std::numeric_limits::max(); + bb.y1 = std::numeric_limits::min(); + for (auto bel : r->bels) { + Loc loc = ctx->getBelLocation(bel); + bb.x0 = std::min(bb.x0, loc.x); + bb.x1 = std::max(bb.x1, loc.x); + bb.y0 = std::min(bb.y0, loc.y); + bb.y1 = std::max(bb.y1, loc.y); + } + } else { + bb.x0 = 0; + bb.y0 = 0; + bb.x1 = max_x; + bb.y1 = max_y; + } + region_bounds[r->name] = bb; + } build_port_index(); } @@ -382,7 +404,8 @@ class SAPlacer ctx->unbindBel(cell->bel); } IdString targetType = cell->type; - for (auto bel : ctx->getBels()) { + + auto proc_bel = [&](BelId bel) { if (ctx->getBelType(bel) == targetType && ctx->isValidBelForCell(cell, bel)) { if (ctx->checkBelAvail(bel)) { uint64_t score = ctx->rng64(); @@ -400,7 +423,18 @@ class SAPlacer } } } + }; + + if (cell->region != nullptr && cell->region->constr_bels) { + for (auto bel : cell->region->bels) { + proc_bel(bel); + } + } else { + for (auto bel : ctx->getBels()) { + proc_bel(bel); + } } + if (best_bel == BelId()) { if (iters == 0 || ripup_bel == BelId()) log_error("failed to place cell '%s' of type '%s'\n", cell->name.c_str(ctx), cell->type.c_str(ctx)); @@ -593,9 +627,22 @@ class SAPlacer { IdString targetType = cell->type; Loc curr_loc = ctx->getBelLocation(cell->bel); + int count = 0; + + int dx = diameter, dy = diameter; + if (cell->region != nullptr && cell->region->constr_bels) { + dx = std::min(diameter, (region_bounds[cell->region->name].x1 - region_bounds[cell->region->name].x0) + 1); + dy = std::min(diameter, (region_bounds[cell->region->name].y1 - region_bounds[cell->region->name].y0) + 1); + // Clamp location to within bounds + curr_loc.x = std::max(region_bounds[cell->region->name].x0, curr_loc.x); + curr_loc.x = std::min(region_bounds[cell->region->name].x1, curr_loc.x); + curr_loc.y = std::max(region_bounds[cell->region->name].y0, curr_loc.y); + curr_loc.y = std::min(region_bounds[cell->region->name].y1, curr_loc.y); + } + while (true) { - int nx = ctx->rng(2 * diameter + 1) + std::max(curr_loc.x - diameter, 0); - int ny = ctx->rng(2 * diameter + 1) + std::max(curr_loc.y - diameter, 0); + int nx = ctx->rng(2 * dx + 1) + std::max(curr_loc.x - dx, 0); + int ny = ctx->rng(2 * dy + 1) + std::max(curr_loc.y - dy, 0); int beltype_idx, beltype_cnt; std::tie(beltype_idx, beltype_cnt) = bel_types.at(targetType); if (beltype_cnt < cfg.minBelsForGridPick) @@ -617,6 +664,7 @@ class SAPlacer continue; if (locked_bels.find(bel) != locked_bels.end()) continue; + count++; return bel; } } @@ -842,6 +890,7 @@ class SAPlacer int n_move, n_accept; int diameter = 35, max_x = 1, max_y = 1; std::unordered_map> bel_types; + std::unordered_map region_bounds; std::vector>>> fast_bels; std::unordered_set locked_bels; std::vector net_by_udata; From 1780f42b9a7854a8a7bf1f2d6589d3d35f133f87 Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 14 Dec 2018 13:41:28 +0000 Subject: [PATCH 17/59] ice40: Add examples folder including floorplan example Signed-off-by: David Shah --- ice40/{ => examples/blinky}/blinky.pcf | 0 ice40/{ => examples/blinky}/blinky.proj | 0 ice40/{ => examples/blinky}/blinky.sh | 0 ice40/{ => examples/blinky}/blinky.v | 0 ice40/{ => examples/blinky}/blinky.ys | 0 ice40/{ => examples/blinky}/blinky_tb.v | 0 ice40/examples/floorplan/.gitignore | 4 ++++ ice40/examples/floorplan/floorplan.py | 5 +++++ ice40/examples/floorplan/floorplan.sh | 6 ++++++ ice40/examples/floorplan/floorplan.v | 22 ++++++++++++++++++++++ ice40/examples/floorplan/icebreaker.pcf | 5 +++++ 11 files changed, 42 insertions(+) rename ice40/{ => examples/blinky}/blinky.pcf (100%) rename ice40/{ => examples/blinky}/blinky.proj (100%) rename ice40/{ => examples/blinky}/blinky.sh (100%) rename ice40/{ => examples/blinky}/blinky.v (100%) rename ice40/{ => examples/blinky}/blinky.ys (100%) rename ice40/{ => examples/blinky}/blinky_tb.v (100%) create mode 100644 ice40/examples/floorplan/.gitignore create mode 100644 ice40/examples/floorplan/floorplan.py create mode 100755 ice40/examples/floorplan/floorplan.sh create mode 100644 ice40/examples/floorplan/floorplan.v create mode 100644 ice40/examples/floorplan/icebreaker.pcf diff --git a/ice40/blinky.pcf b/ice40/examples/blinky/blinky.pcf similarity index 100% rename from ice40/blinky.pcf rename to ice40/examples/blinky/blinky.pcf diff --git a/ice40/blinky.proj b/ice40/examples/blinky/blinky.proj similarity index 100% rename from ice40/blinky.proj rename to ice40/examples/blinky/blinky.proj diff --git a/ice40/blinky.sh b/ice40/examples/blinky/blinky.sh similarity index 100% rename from ice40/blinky.sh rename to ice40/examples/blinky/blinky.sh diff --git a/ice40/blinky.v b/ice40/examples/blinky/blinky.v similarity index 100% rename from ice40/blinky.v rename to ice40/examples/blinky/blinky.v diff --git a/ice40/blinky.ys b/ice40/examples/blinky/blinky.ys similarity index 100% rename from ice40/blinky.ys rename to ice40/examples/blinky/blinky.ys diff --git a/ice40/blinky_tb.v b/ice40/examples/blinky/blinky_tb.v similarity index 100% rename from ice40/blinky_tb.v rename to ice40/examples/blinky/blinky_tb.v diff --git a/ice40/examples/floorplan/.gitignore b/ice40/examples/floorplan/.gitignore new file mode 100644 index 00000000..d93659be --- /dev/null +++ b/ice40/examples/floorplan/.gitignore @@ -0,0 +1,4 @@ +*.json +*.asc +*.bin +__pycache__ \ No newline at end of file diff --git a/ice40/examples/floorplan/floorplan.py b/ice40/examples/floorplan/floorplan.py new file mode 100644 index 00000000..85c53ccd --- /dev/null +++ b/ice40/examples/floorplan/floorplan.py @@ -0,0 +1,5 @@ +ctx.createRectangularRegion("osc", 1, 1, 1, 4) +for cell, cellinfo in ctx.cells: + if "ringosc" in cellinfo.attrs: + print("Floorplanned cell %s" % cell) + ctx.constrainCellToRegion(cell, "osc") diff --git a/ice40/examples/floorplan/floorplan.sh b/ice40/examples/floorplan/floorplan.sh new file mode 100755 index 00000000..e0ed7a64 --- /dev/null +++ b/ice40/examples/floorplan/floorplan.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -ex +yosys -p "synth_ice40 -top top -json floorplan.json" floorplan.v +../../../nextpnr-ice40 --up5k --json floorplan.json --pcf icebreaker.pcf --asc floorplan.asc --ignore-loops --pre-place floorplan.py +icepack floorplan.asc floorplan.bin +iceprog floorplan.bin diff --git a/ice40/examples/floorplan/floorplan.v b/ice40/examples/floorplan/floorplan.v new file mode 100644 index 00000000..8f99ed4e --- /dev/null +++ b/ice40/examples/floorplan/floorplan.v @@ -0,0 +1,22 @@ +module top(output LED1, LED2, LED3, LED4, LED5); + localparam N = 31; + wire [N:0] x; + assign x[0] = x[N]; + + genvar ii; + generate + + for (ii = 0; ii < N; ii = ii + 1) begin + (* ringosc *) + SB_LUT4 #(.LUT_INIT(1)) lut_i(.I0(x[ii]), .I1(), .I2(), .I3(), .O(x[ii+1])); + end + endgenerate + + assign clk = x[N]; + + + reg [19:0] ctr; + always @(posedge clk) + ctr <= ctr + 1'b1; + assign {LED5, LED4, LED3, LED2, LED1} = ctr[19:15]; +endmodule diff --git a/ice40/examples/floorplan/icebreaker.pcf b/ice40/examples/floorplan/icebreaker.pcf new file mode 100644 index 00000000..ac7ebf9e --- /dev/null +++ b/ice40/examples/floorplan/icebreaker.pcf @@ -0,0 +1,5 @@ +set_io -nowarn LED1 26 +set_io -nowarn LED2 27 +set_io -nowarn LED3 25 +set_io -nowarn LED4 23 +set_io -nowarn LED5 21 From bd0a33022baf78751052e1655557d2e42932504e Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 30 Dec 2018 10:38:51 +0100 Subject: [PATCH 18/59] placer1: Implement non-timing-driven mode Signed-off-by: David Shah --- common/placer1.cc | 57 ++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index e4fa0320..4f9d6a87 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -455,6 +455,7 @@ class SAPlacer // Attempt a SA position swap, return true on success or false on failure bool try_swap_position(CellInfo *cell, BelId newBel) { + static const double epsilon = 1e-20; moveChange.reset(); BelId oldBel = cell->bel; CellInfo *other_cell = ctx->getBoundBelCell(newBel); @@ -496,8 +497,8 @@ class SAPlacer new_dist = get_constraints_distance(ctx, cell); if (other_cell != nullptr) new_dist += get_constraints_distance(ctx, other_cell); - delta = lambda * (moveChange.timing_delta / last_timing_cost) + - (1 - lambda) * (double(moveChange.wirelen_delta) / last_wirelen_cost); + delta = lambda * (moveChange.timing_delta / std::max(last_timing_cost, epsilon)) + + (1 - lambda) * (double(moveChange.wirelen_delta) / std::max(last_wirelen_cost, epsilon)); delta += (cfg.constraintWeight / temp) * (new_dist - old_dist) / last_wirelen_cost; n_move++; // SA acceptance criterea @@ -728,8 +729,9 @@ class SAPlacer if (ignore_net(ni)) continue; net_bounds[ni->udata] = get_net_bounds(ni); - for (size_t i = 0; i < ni->users.size(); i++) - net_arc_tcost[ni->udata][i] = get_timing_cost(ni, i); + if (ctx->timing_driven) + for (size_t i = 0; i < ni->users.size(); i++) + net_arc_tcost[ni->udata][i] = get_timing_cost(ni, i); } } @@ -804,21 +806,23 @@ class SAPlacer mc.bounds_changed_nets.push_back(pn->udata); mc.already_bounds_changed[pn->udata] = true; } - // Output ports - all arcs change timing - if (port.second.type == PORT_OUT) { - int cc; - TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc); - if (cls != TMG_IGNORE) - for (size_t i = 0; i < pn->users.size(); i++) - if (!mc.already_changed_arcs[pn->udata][i]) { - mc.changed_arcs.emplace_back(std::make_pair(pn->udata, i)); - mc.already_changed_arcs[pn->udata][i] = true; - } - } else if (port.second.type == PORT_IN) { - auto usr = fast_port_to_user.at(&port.second); - if (!mc.already_changed_arcs[pn->udata][usr]) { - mc.changed_arcs.emplace_back(std::make_pair(pn->udata, usr)); - mc.already_changed_arcs[pn->udata][usr] = true; + if (ctx->timing_driven) { + // Output ports - all arcs change timing + if (port.second.type == PORT_OUT) { + int cc; + TimingPortClass cls = ctx->getPortTimingClass(cell, port.first, cc); + if (cls != TMG_IGNORE) + for (size_t i = 0; i < pn->users.size(); i++) + if (!mc.already_changed_arcs[pn->udata][i]) { + mc.changed_arcs.emplace_back(std::make_pair(pn->udata, i)); + mc.already_changed_arcs[pn->udata][i] = true; + } + } else if (port.second.type == PORT_IN) { + auto usr = fast_port_to_user.at(&port.second); + if (!mc.already_changed_arcs[pn->udata][usr]) { + mc.changed_arcs.emplace_back(std::make_pair(pn->udata, usr)); + mc.already_changed_arcs[pn->udata][usr] = true; + } } } } @@ -833,13 +837,14 @@ class SAPlacer md.wirelen_delta += (bounds.hpwl() - old_hpwl); md.already_bounds_changed[bc] = false; } - - for (const auto &tc : md.changed_arcs) { - double old_cost = net_arc_tcost.at(tc.first).at(tc.second); - double new_cost = get_timing_cost(net_by_udata.at(tc.first), tc.second); - md.new_arc_costs.emplace_back(std::make_pair(tc, new_cost)); - md.timing_delta += (new_cost - old_cost); - md.already_changed_arcs[tc.first][tc.second] = false; + if (ctx->timing_driven) { + for (const auto &tc : md.changed_arcs) { + double old_cost = net_arc_tcost.at(tc.first).at(tc.second); + double new_cost = get_timing_cost(net_by_udata.at(tc.first), tc.second); + md.new_arc_costs.emplace_back(std::make_pair(tc, new_cost)); + md.timing_delta += (new_cost - old_cost); + md.already_changed_arcs[tc.first][tc.second] = false; + } } } From 9cd5980d35ac7174ce577b89efac7f4d4a5f8ada Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 30 Dec 2018 10:53:13 +0100 Subject: [PATCH 19/59] placer1: Add (currently-unused) option for fanout threshold for timing cost calc Signed-off-by: David Shah --- common/placer1.cc | 7 ++++--- common/placer1.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index 4f9d6a87..b180ebfb 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -498,7 +498,7 @@ class SAPlacer if (other_cell != nullptr) new_dist += get_constraints_distance(ctx, other_cell); delta = lambda * (moveChange.timing_delta / std::max(last_timing_cost, epsilon)) + - (1 - lambda) * (double(moveChange.wirelen_delta) / std::max(last_wirelen_cost, epsilon)); + (1 - lambda) * (double(moveChange.wirelen_delta) / std::max(last_wirelen_cost, epsilon)); delta += (cfg.constraintWeight / temp) * (new_dist - old_dist) / last_wirelen_cost; n_move++; // SA acceptance criterea @@ -729,7 +729,7 @@ class SAPlacer if (ignore_net(ni)) continue; net_bounds[ni->udata] = get_net_bounds(ni); - if (ctx->timing_driven) + if (ctx->timing_driven && int(ni->users.size()) < cfg.timingFanoutThresh) for (size_t i = 0; i < ni->users.size(); i++) net_arc_tcost[ni->udata][i] = get_timing_cost(ni, i); } @@ -806,7 +806,7 @@ class SAPlacer mc.bounds_changed_nets.push_back(pn->udata); mc.already_bounds_changed[pn->udata] = true; } - if (ctx->timing_driven) { + if (ctx->timing_driven && int(pn->users.size()) < cfg.timingFanoutThresh) { // Output ports - all arcs change timing if (port.second.type == PORT_OUT) { int cc; @@ -913,6 +913,7 @@ Placer1Cfg::Placer1Cfg(Context *ctx) : Settings(ctx) minBelsForGridPick = get("placer1/minBelsForGridPick", 64); budgetBased = get("placer1/budgetBased", false); startTemp = get("placer1/startTemp", 1); + timingFanoutThresh = std::numeric_limits::max(); } bool placer1(Context *ctx, Placer1Cfg cfg) diff --git a/common/placer1.h b/common/placer1.h index aafc840c..a0eabbb0 100644 --- a/common/placer1.h +++ b/common/placer1.h @@ -31,6 +31,7 @@ struct Placer1Cfg : public Settings int minBelsForGridPick; bool budgetBased; float startTemp; + int timingFanoutThresh; }; extern bool placer1(Context *ctx, Placer1Cfg cfg); From 4cf8549f5c8b33c9c2363fc7f22c70e9d50357bf Mon Sep 17 00:00:00 2001 From: David Shah Date: Tue, 8 Jan 2019 10:36:30 +0000 Subject: [PATCH 20/59] placer1: Legalise after reaching a diameter, not temperature Signed-off-by: David Shah --- common/placer1.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index b180ebfb..767dbae6 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -317,7 +317,7 @@ class SAPlacer } // Once cooled below legalise threshold, run legalisation and start requiring // legal moves only - if (temp < legalise_temp && require_legal) { + if (diameter < legalise_dia && require_legal) { if (legalise_relative_constraints(ctx)) { // Only increase temperature if something was moved autoplaced.clear(); @@ -329,8 +329,8 @@ class SAPlacer else if (cell.second->belStrength < STRENGTH_STRONG) autoplaced.push_back(cell.second); } - temp = post_legalise_temp; - diameter = std::min(M, diameter * post_legalise_dia_scale); + // temp = post_legalise_temp; + // diameter = std::min(M, diameter * post_legalise_dia_scale); ctx->shuffle(autoplaced); // Legalisation is a big change so force a slack redistribution here @@ -901,9 +901,7 @@ class SAPlacer std::vector net_by_udata; std::vector old_udata; bool require_legal = true; - const float legalise_temp = 0.001; - const float post_legalise_temp = 0.002; - const float post_legalise_dia_scale = 1.5; + const int legalise_dia = 4; Placer1Cfg cfg; }; From e36460b83e79eabb06413b1b295f2edb2aab0a09 Mon Sep 17 00:00:00 2001 From: David Shah Date: Thu, 10 Jan 2019 11:18:47 +0000 Subject: [PATCH 21/59] HeAP: Initial infrastructure Signed-off-by: David Shah --- common/placer_heap.cc | 366 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 366 insertions(+) create mode 100644 common/placer_heap.cc diff --git a/common/placer_heap.cc b/common/placer_heap.cc new file mode 100644 index 00000000..19d5a8e5 --- /dev/null +++ b/common/placer_heap.cc @@ -0,0 +1,366 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2019 David Shah + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * [[cite]] HeAP + * Analytical Placement for Heterogeneous FPGAs, Marcel Gort and Jason H. Anderson + * https://janders.eecg.utoronto.ca/pdfs/marcelfpl12.pdf + * + * [[cite]] SimPL + * SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov + * http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf + */ + +#include +#include +#include "log.h" +#include "nextpnr.h" +#include "place_common.h" +#include "util.h" + +NEXTPNR_NAMESPACE_BEGIN + +namespace { +// A simple internal representation for a sparse system of equations Ax = rhs +// This is designed to decouple the functions that build the matrix to the engine that +// solves it, and the representation that requires +template struct EquationSystem +{ + EquationSystem(size_t rows, size_t cols) + { + A.resize(cols); + rhs.resize(rows); + } + + // Simple sparse format, easy to convert to CCS for solver + std::vector>> A; // col -> (row, x[row, col]) sorted by row + std::vector rhs; // RHS vector + + void reset() + { + for (auto &col : A) + col.clear(); + std::fill(rhs.begin(), rhs.end(), T()); + } + + void add_coeff(int row, int col, T val) + { + auto &Ac = A[col]; + // Binary search + int b = 0, e = int(Ac.size()) - 1; + while (b <= e) { + int i = (b + e) / 2; + if (Ac[i].first == row) { + Ac[i].second += val; + return; + } + if (Ac[i].first > row) + e = i - 1; + else + b = i + 1; + } + Ac.insert(Ac.begin() + b, std::make_pair(row, val)); + } + + void add_rhs(int row, T val) { rhs[row] += val; } +}; +} // namespace + +class HeAPPlacer +{ + public: + HeAPPlacer(Context *ctx) : ctx(ctx) {} + + private: + Context *ctx; + + int diameter, max_x, max_y; + std::vector>>> fast_bels; + std::unordered_map> bel_types; + + // For fast handling of heterogeneosity during initial placement without full legalisation, + // for each Bel type this goes from x or y to the nearest x or y where a Bel of a given type exists + // This is particularly important for the iCE40 architecture, where multipliers and BRAM only exist at the + // edges and corners respectively + std::vector> nearest_row_with_bel; + std::vector> nearest_col_with_bel; + + // In some cases, we can't use bindBel because we allow overlap in the earlier stages. So we use this custom + // structure instead + struct CellLocation + { + int x, y; + bool locked, global; + }; + std::unordered_map cell_locs; + // The set of cells that we will actually place. This excludes locked cells and children cells of macros/chains + // (only the root of each macro is placed.) + std::vector place_cells; + + // Place cells with the BEL attribute set to constrain them + void place_constraints() + { + size_t placed_cells = 0; + // Initial constraints placer + for (auto &cell_entry : ctx->cells) { + CellInfo *cell = cell_entry.second.get(); + auto loc = cell->attrs.find(ctx->id("BEL")); + if (loc != cell->attrs.end()) { + std::string loc_name = loc->second; + BelId bel = ctx->getBelByName(ctx->id(loc_name)); + if (bel == BelId()) { + log_error("No Bel named \'%s\' located for " + "this chip (processing BEL attribute on \'%s\')\n", + loc_name.c_str(), cell->name.c_str(ctx)); + } + + IdString bel_type = ctx->getBelType(bel); + if (bel_type != cell->type) { + log_error("Bel \'%s\' of type \'%s\' does not match cell " + "\'%s\' of type \'%s\'\n", + loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); + } + if (!ctx->isValidBelForCell(cell, bel)) { + log_error("Bel \'%s\' of type \'%s\' is not valid for cell " + "\'%s\' of type \'%s\'\n", + loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); + } + + auto bound_cell = ctx->getBoundBelCell(bel); + if (bound_cell) { + log_error("Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n", + cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx)); + } + + ctx->bindBel(bel, cell, STRENGTH_USER); + placed_cells++; + } + } + int constr_placed_cells = placed_cells; + log_info("Placed %d cells based on constraints.\n", int(placed_cells)); + ctx->yield(); + } + + // Construct the fast_bels, nearest_row_with_bel and nearest_col_with_bel + void build_fast_bels() + { + + int num_bel_types = 0; + for (auto bel : ctx->getBels()) { + IdString type = ctx->getBelType(bel); + if (bel_types.find(type) == bel_types.end()) { + bel_types[type] = std::tuple(num_bel_types++, 1); + } else { + std::get<1>(bel_types.at(type))++; + } + } + for (auto bel : ctx->getBels()) { + if (!ctx->checkBelAvail(bel)) + continue; + Loc loc = ctx->getBelLocation(bel); + IdString type = ctx->getBelType(bel); + int type_idx = std::get<0>(bel_types.at(type)); + if (int(fast_bels.size()) < type_idx + 1) + fast_bels.resize(type_idx + 1); + if (int(fast_bels.at(type_idx).size()) < (loc.x + 1)) + fast_bels.at(type_idx).resize(loc.x + 1); + if (int(fast_bels.at(type_idx).at(loc.x).size()) < (loc.y + 1)) + fast_bels.at(type_idx).at(loc.x).resize(loc.y + 1); + max_x = std::max(max_x, loc.x); + max_y = std::max(max_y, loc.y); + fast_bels.at(type_idx).at(loc.x).at(loc.y).push_back(bel); + } + diameter = std::max(max_x, max_y) + 1; + nearest_row_with_bel.resize(num_bel_types, std::vector(max_y + 1, -1)); + nearest_col_with_bel.resize(num_bel_types, std::vector(max_x + 1, -1)); + for (auto bel : ctx->getBels()) { + if (!ctx->checkBelAvail(bel)) + continue; + Loc loc = ctx->getBelLocation(bel); + int type_idx = std::get<0>(bel_types.at(ctx->getBelType(bel))); + auto &nr = nearest_row_with_bel.at(type_idx), &nc = nearest_col_with_bel.at(type_idx); + // Traverse outwards through nearest_row_with_bel and nearest_col_with_bel, stopping once + // another row/col is already recorded as being nearer + for (int x = loc.x; x <= max_x; x++) { + if (nc.at(x) == -1 || std::abs(loc.x - nc.at(x)) <= (x - loc.x)) + break; + nc.at(x) = loc.x; + } + for (int x = loc.x - 1; x >= 0; x--) { + if (nc.at(x) == -1 || std::abs(loc.x - nc.at(x)) <= (loc.x - x)) + break; + nc.at(x) = loc.x; + } + for (int y = loc.y; y <= max_y; y++) { + if (nr.at(y) == -1 || std::abs(loc.y - nc.at(y)) <= (y - loc.y)) + break; + nr.at(y) = loc.y; + } + for (int y = loc.y - 1; y >= 0; y--) { + if (nc.at(y) == -1 || std::abs(loc.y - nc.at(y)) <= (loc.y - y)) + break; + nc.at(y) = loc.y; + } + } + } + + // Build up a random initial placement, without regard to legality + // FIXME: Are there better approaches to the initial placement (e.g. greedy?) + void seed_placement() + { + std::unordered_map> available_bels; + for (auto bel : ctx->getBels()) { + if (!ctx->checkBelAvail(bel)) + continue; + available_bels[ctx->getBelType(bel)].push_back(bel); + } + for (auto &ab : available_bels) + ctx->shuffle(ab.second); + int placed_cell_count = 0; + for (auto cell : sorted(ctx->cells)) { + CellInfo *ci = cell.second; + if (ci->bel != BelId()) { + Loc loc = ctx->getBelLocation(ci->bel); + cell_locs[cell.first].x = loc.x; + cell_locs[cell.first].y = loc.y; + cell_locs[cell.first].locked = true; + cell_locs[cell.first].global = ctx->getBelGlobalBuf(ci->bel); + + } else if (ci->constr_parent == nullptr) { + if (!available_bels.count(ci->type) || available_bels.at(ci->type).empty()) + log_error("Unable to place cell '%s', no Bels remaining of type '%s'\n", ci->name.c_str(ctx), + ci->type.c_str(ctx)); + BelId bel = available_bels.at(ci->type).back(); + available_bels.at(ci->type).pop_back(); + Loc loc = ctx->getBelLocation(bel); + cell_locs[cell.first].x = loc.x; + cell_locs[cell.first].y = loc.y; + cell_locs[cell.first].locked = false; + cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel); + ci->udata = placed_cell_count++; + place_cells.push_back(ci); + } + } + } + + // Update the location of all children of a chain + void update_chain(CellInfo *cell) + { + const auto &base = cell_locs[cell->name]; + for (auto child : cell->constr_children) { + if (child->constr_x != child->UNCONSTR) + cell_locs[child->name].x = base.x + child->constr_x; + else + cell_locs[child->name].x = base.x; // better handling of UNCONSTR? + if (child->constr_y != child->UNCONSTR) + cell_locs[child->name].y = base.y + child->constr_y; + else + cell_locs[child->name].y = base.y; // better handling of UNCONSTR? + if (!child->constr_children.empty()) + update_chain(child); + } + } + + // Update all chains + void update_all_chains() + { + for (auto cell : place_cells) { + if (!cell->constr_children.empty()) + update_chain(cell); + } + } + + // Run a function on all ports of a net - including the driver and all users + template void foreach_port(NetInfo *net, Tf func) + { + if (net->driver.cell != nullptr) + func(net->driver); + for (auto &user : net->users) + func(user); + } + + // Build the system of equations for either X or Y + void build_equations(EquationSystem &es, bool yaxis) + { + // Return the x or y position of a cell, depending on ydir + auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; + + es.reset(); + + for (auto net : sorted(ctx->nets)) { + NetInfo *ni = net.second; + if (ni->driver.cell == nullptr) + continue; + if (ni->users.empty()) + continue; + if (cell_locs.at(ni->driver.cell->name).global) + continue; + // Find the bounds of the net in this axis, and the ports that correspond to these bounds + PortRef *lbport = nullptr, *ubport = nullptr; + int lbpos = std::numeric_limits::max(), ubpos = std::numeric_limits::min(); + foreach_port(ni, [&](PortRef &port) { + int pos = cell_pos(port.cell); + if (pos < lbpos) { + lbpos = pos; + lbport = &port; + } + if (pos > ubpos) { + ubpos = pos; + ubport = &port; + } + }); + // Add all relevant connections to the matrix + foreach_port(ni, [&](PortRef &port) { + int this_pos = cell_pos(port.cell); + auto process_arc = [&](PortRef *other) { + if (other == &port) + return; + int o_pos = cell_pos(other->cell); + if (o_pos == this_pos) + return; // FIXME: or clamp to 1? + double weight = 1. / (ni->users.size() * std::abs(o_pos - this_pos)); + // FIXME: add criticality to weighting + + // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, + // if the other end isn't fixed + if (!cell_locs.at(port.cell->name).locked) { + es.add_coeff(port.cell->udata, port.cell->udata, weight); + if (!cell_locs.at(other->cell->name).locked) + es.add_coeff(other->cell->udata, port.cell->udata, -weight); + } else { + // Add our fixed position to the other end's RHS + if (!cell_locs.at(other->cell->name).locked) + es.add_rhs(other->cell->udata, this_pos * weight); + } + // Opposite for the other end of the connection + if (!cell_locs.at(other->cell->name).locked) { + es.add_coeff(other->cell->udata, other->cell->udata, weight); + if (!cell_locs.at(port.cell->name).locked) + es.add_coeff(port.cell->udata, other->cell->udata, -weight); + } else { + // Add our fixed position to the other end's RHS + if (!cell_locs.at(port.cell->name).locked) + es.add_rhs(port.cell->udata, this_pos * weight); + } + }; + process_arc(lbport); + process_arc(ubport); + }); + } + } +}; + +NEXTPNR_NAMESPACE_END \ No newline at end of file From ea56dc9d084a694450d995d147b18a4de86e8b7c Mon Sep 17 00:00:00 2001 From: David Shah Date: Thu, 10 Jan 2019 16:42:29 +0000 Subject: [PATCH 22/59] HeAP: Add TAUCS wrapper and integration Signed-off-by: David Shah --- CMakeLists.txt | 6 ++++-- common/placer_heap.cc | 47 ++++++++++++++++++++++++++++++++++++++++++- common/placer_heap.h | 34 +++++++++++++++++++++++++++++++ common/placer_math.c | 43 +++++++++++++++++++++++++++++++++++++++ common/placer_math.h | 43 +++++++++++++++++++++++++++++++++++++++ ecp5/arch.cc | 2 +- ice40/arch.cc | 6 ++++-- 7 files changed, 175 insertions(+), 6 deletions(-) create mode 100644 common/placer_heap.h create mode 100644 common/placer_math.c create mode 100644 common/placer_math.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f29d132..2fbfa735 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,6 +122,8 @@ configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/common/version.h.in ${CMAKE_CURRENT_BINARY_DIR}/generated/version.h ) +add_subdirectory(3rdparty/taucs ${CMAKE_CURRENT_BINARY_DIR}/generated/taucs EXCLUDE_FROM_ALL) + if (BUILD_PYTHON) # Find Boost::Python of a suitable version in a cross-platform way # Some distributions (Arch) call it libboost_python3, others such as Ubuntu @@ -180,7 +182,7 @@ if (BUILD_PYTHON) endif () endif() -include_directories(common/ json/ ${Boost_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS}) +include_directories(common/ json/ ${Boost_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} 3rdparty/taucs/src ${CMAKE_CURRENT_BINARY_DIR}/generated/taucs/build/linux) aux_source_directory(common/ COMMON_SRC_FILES) aux_source_directory(json/ JSON_PARSER_FILES) set(COMMON_FILES ${COMMON_SRC_FILES} ${JSON_PARSER_FILES}) @@ -244,7 +246,7 @@ foreach (family ${ARCH}) # Include family-specific source files to all family targets and set defines appropriately target_include_directories(${target} PRIVATE ${family}/ ${CMAKE_CURRENT_BINARY_DIR}/generated/) target_compile_definitions(${target} PRIVATE NEXTPNR_NAMESPACE=nextpnr_${family} ARCH_${ufamily} ARCHNAME=${family}) - target_link_libraries(${target} LINK_PUBLIC ${Boost_LIBRARIES} ${link_param}) + target_link_libraries(${target} LINK_PUBLIC ${Boost_LIBRARIES} taucs ${link_param}) if (NOT MSVC) target_link_libraries(${target} LINK_PUBLIC pthread) endif() diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 19d5a8e5..9f49e552 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -25,12 +25,13 @@ */ #include +#include #include #include "log.h" #include "nextpnr.h" #include "place_common.h" +#include "placer_math.h" #include "util.h" - NEXTPNR_NAMESPACE_BEGIN namespace { @@ -76,13 +77,44 @@ template struct EquationSystem } void add_rhs(int row, T val) { rhs[row] += val; } + + void solve(std::vector &x) + { + int nnz = std::accumulate(A.begin(), A.end(), 0, + [](int a, const std::vector> &vec) { return a + int(vec.size()); }); + taucif_system *sys = taucif_create_system(int(rhs.size()), int(A.size()), nnz); + for (int col = 0; col < int(A.size()); col++) { + auto &Ac = A[col]; + for (auto &el : Ac) { + if (col <= el.first) + taucif_set_matrix_value(sys, el.first, col, el.second); + // FIXME: in debug mode, assert really is symmetric + } + } + taucif_solve_system(sys, x.data(), rhs.data()); + taucif_free_system(sys); + } }; + } // namespace class HeAPPlacer { public: HeAPPlacer(Context *ctx) : ctx(ctx) {} + bool place() + { + taucif_init_solver(); + place_constraints(); + build_fast_bels(); + seed_placement(); + update_all_chains(); + + EquationSystem es(place_cells.size(), place_cells.size()); + build_equations(es, false); + solve_equations(es, false); + return true; + } private: Context *ctx; @@ -361,6 +393,19 @@ class HeAPPlacer }); } } + + // Build the system of equations for either X or Y + void solve_equations(EquationSystem &es, bool yaxis) + { + // Return the x or y position of a cell, depending on ydir + auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; + build_equations(es, yaxis); + std::vector vals; + std::transform(place_cells.begin(), place_cells.end(), std::back_inserter(vals), cell_pos); + es.solve(vals); + } }; +bool placer_heap(Context *ctx) { return HeAPPlacer(ctx).place(); } + NEXTPNR_NAMESPACE_END \ No newline at end of file diff --git a/common/placer_heap.h b/common/placer_heap.h new file mode 100644 index 00000000..5eb8a9ba --- /dev/null +++ b/common/placer_heap.h @@ -0,0 +1,34 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2019 David Shah + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * [[cite]] HeAP + * Analytical Placement for Heterogeneous FPGAs, Marcel Gort and Jason H. Anderson + * https://janders.eecg.utoronto.ca/pdfs/marcelfpl12.pdf + * + * [[cite]] SimPL + * SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov + * http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf + */ + +#ifndef PLACER_HEAP_H +#define PLACER_HEAP +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN +extern bool placer_heap(Context *ctx); +NEXTPNR_NAMESPACE_END +#endif \ No newline at end of file diff --git a/common/placer_math.c b/common/placer_math.c new file mode 100644 index 00000000..456bc0a1 --- /dev/null +++ b/common/placer_math.c @@ -0,0 +1,43 @@ +#include "taucs.h" +#include "placer_math.h" +#include + +void taucif_init_solver() { + taucs_logfile("stdout"); +} + +struct taucif_system { + taucs_ccs_matrix* mat; + int ccs_i, ccs_col; +}; + +struct taucif_system *taucif_create_system(int rows, int cols, int n_nonzero) { + struct taucif_system *sys = taucs_malloc(sizeof(struct taucif_system)); + sys->mat = taucs_ccs_create(cols, rows, n_nonzero, TAUCS_DOUBLE | TAUCS_SYMMETRIC); + // Internal pointers + sys->ccs_i = 0; + sys->ccs_col = -1; + return sys; +}; + +void taucif_set_matrix_value(struct taucif_system *sys, int row, int col, double value) { + while(sys->ccs_col < col) { + sys->mat->colptr[++sys->ccs_col] = sys->ccs_i; + } + sys->mat->rowind[sys->ccs_i] = row; + sys->mat->values.d[sys->ccs_i++] = value; +} + +void taucif_solve_system(struct taucif_system *sys, double *x, double *rhs) { + // FIXME: preconditioner, droptol?? + taucs_ccs_matrix* precond_mat = taucs_ccs_factor_llt(sys->mat, 1e-3, 0); + // FIXME: itermax, convergetol + int cjres = taucs_conjugate_gradients(sys->mat, taucs_ccs_solve_llt, precond_mat, x, rhs, 1000, 1e-6); + taucs_ccs_free(precond_mat); +} + +void taucif_free_system(struct taucif_system *sys) { + taucs_ccs_free(sys->mat); + taucs_free(sys->mat); +} + diff --git a/common/placer_math.h b/common/placer_math.h new file mode 100644 index 00000000..3782e99f --- /dev/null +++ b/common/placer_math.h @@ -0,0 +1,43 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2019 David Shah + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + */ + +#ifndef PLACER_MATH_H +#define PLACER_MATH_H +// This shim is needed because Tauc is mutually incompatible with modern C++ (implementing macros and functions +// that collide with max, min, etc) +#ifdef __cplusplus +extern "C" { +#endif +extern void taucif_init_solver(); + +struct taucif_system; + +extern struct taucif_system *taucif_create_system(int rows, int cols, int n_nonzero); + +extern void taucif_set_matrix_value(struct taucif_system *sys, int row, int col, double value); + +extern void taucif_solve_system(struct taucif_system *sys, double *x, double *rhs); + +extern void taucif_free_system(struct taucif_system *sys); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/ecp5/arch.cc b/ecp5/arch.cc index da0f7b1a..8ba8d28e 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -458,6 +458,7 @@ delay_t Arch::estimateDelay(WireId src, WireId dst) const int dx = abs(src_loc.first - dst_loc.first), dy = abs(src_loc.second - dst_loc.second); return (130 - 25 * args.speed) * (6 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); + } delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const @@ -467,7 +468,6 @@ delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const return 0; auto driver_loc = getBelLocation(driver.cell->bel); auto sink_loc = getBelLocation(sink.cell->bel); - // Encourage use of direct interconnect if (driver_loc.x == sink_loc.x && driver_loc.y == sink_loc.y) { if ((sink.port == id_A0 || sink.port == id_A1) && (driver.port == id_F1) && diff --git a/ice40/arch.cc b/ice40/arch.cc index fbe882fc..5688b6e6 100644 --- a/ice40/arch.cc +++ b/ice40/arch.cc @@ -26,10 +26,10 @@ #include "log.h" #include "nextpnr.h" #include "placer1.h" +#include "placer_heap.h" #include "router1.h" #include "timing_opt.h" #include "util.h" - NEXTPNR_NAMESPACE_BEGIN // ----------------------------------------------------------------------- @@ -671,7 +671,9 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay bool Arch::place() { - if (!placer1(getCtx(), Placer1Cfg(getCtx()))) + // if (!placer1(getCtx(), Placer1Cfg(getCtx()))) + // return false; + if (!placer_heap(getCtx())) return false; if (bool_or_default(settings, id("opt_timing"), false)) { TimingOptCfg tocfg(getCtx()); From d5cfd38179bf00f61ac0e8d8fcb78382733773dd Mon Sep 17 00:00:00 2001 From: David Shah Date: Thu, 10 Jan 2019 19:10:47 +0000 Subject: [PATCH 23/59] HeAP: Successful solver convergance Signed-off-by: David Shah --- common/placer_heap.cc | 116 ++++++++++++++++++++++++++++++++++-------- common/placer_math.c | 20 ++++++-- common/placer_math.h | 6 ++- 3 files changed, 115 insertions(+), 27 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 9f49e552..d348d260 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -59,16 +59,16 @@ template struct EquationSystem void add_coeff(int row, int col, T val) { - auto &Ac = A[col]; + auto &Ac = A.at(col); // Binary search int b = 0, e = int(Ac.size()) - 1; while (b <= e) { int i = (b + e) / 2; - if (Ac[i].first == row) { - Ac[i].second += val; + if (Ac.at(i).first == row) { + Ac.at(i).second += val; return; } - if (Ac[i].first > row) + if (Ac.at(i).first > row) e = i - 1; else b = i + 1; @@ -80,19 +80,29 @@ template struct EquationSystem void solve(std::vector &x) { + NPNR_ASSERT(x.size() == A.size()); + int nnz = std::accumulate(A.begin(), A.end(), 0, [](int a, const std::vector> &vec) { return a + int(vec.size()); }); taucif_system *sys = taucif_create_system(int(rhs.size()), int(A.size()), nnz); for (int col = 0; col < int(A.size()); col++) { auto &Ac = A[col]; for (auto &el : Ac) { - if (col <= el.first) - taucif_set_matrix_value(sys, el.first, col, el.second); + if (col <= el.first) { + // log_info("%d %d %f\n", el.first, col, el.second); + taucif_add_matrix_value(sys, el.first, col, el.second); + } + // FIXME: in debug mode, assert really is symmetric } } - taucif_solve_system(sys, x.data(), rhs.data()); + taucif_finalise_matrix(sys); + int result = taucif_solve_system(sys, x.data(), rhs.data()); + NPNR_ASSERT(result == 0); taucif_free_system(sys); + + // for (int i = 0; i < int(x.size()); i++) + // log_info("x[%d] = %f\n", i, x.at(i)); } }; @@ -104,22 +114,38 @@ class HeAPPlacer HeAPPlacer(Context *ctx) : ctx(ctx) {} bool place() { + ctx->lock(); taucif_init_solver(); place_constraints(); build_fast_bels(); seed_placement(); update_all_chains(); - EquationSystem es(place_cells.size(), place_cells.size()); - build_equations(es, false); - solve_equations(es, false); + for (int i = 0; i < 20; i++) { + EquationSystem esx(place_cells.size(), place_cells.size()); + build_equations(esx, false); + // log_info("x-axis\n"); + solve_equations(esx, false); + + EquationSystem esy(place_cells.size(), place_cells.size()); + build_equations(esy, true); + // log_info("y-axis\n"); + solve_equations(esy, true); + + update_all_chains(); + + wirelen_t hpwl = total_hpwl(); + log_info("Initial placer iter %d, hpwl = %d\n", i, int(hpwl)); + } + + ctx->unlock(); return true; } private: Context *ctx; - int diameter, max_x, max_y; + int max_x = 0, max_y = 0; std::vector>>> fast_bels; std::unordered_map> bel_types; @@ -215,7 +241,7 @@ class HeAPPlacer max_y = std::max(max_y, loc.y); fast_bels.at(type_idx).at(loc.x).at(loc.y).push_back(bel); } - diameter = std::max(max_x, max_y) + 1; + nearest_row_with_bel.resize(num_bel_types, std::vector(max_y + 1, -1)); nearest_col_with_bel.resize(num_bel_types, std::vector(max_x + 1, -1)); for (auto bel : ctx->getBels()) { @@ -237,18 +263,29 @@ class HeAPPlacer nc.at(x) = loc.x; } for (int y = loc.y; y <= max_y; y++) { - if (nr.at(y) == -1 || std::abs(loc.y - nc.at(y)) <= (y - loc.y)) + if (nr.at(y) == -1 || std::abs(loc.y - nr.at(y)) <= (y - loc.y)) break; nr.at(y) = loc.y; } for (int y = loc.y - 1; y >= 0; y--) { - if (nc.at(y) == -1 || std::abs(loc.y - nc.at(y)) <= (loc.y - y)) + if (nr.at(y) == -1 || std::abs(loc.y - nr.at(y)) <= (loc.y - y)) break; - nc.at(y) = loc.y; + nr.at(y) = loc.y; } } } + // Check if a cell has any meaningful connectivity + bool has_connectivity(CellInfo *cell) + { + for (auto port : cell->ports) { + if (port.second.net != nullptr && port.second.net->driver.cell != nullptr && + !port.second.net->users.empty()) + return true; + } + return false; + } + // Build up a random initial placement, without regard to legality // FIXME: Are there better approaches to the initial placement (e.g. greedy?) void seed_placement() @@ -264,13 +301,13 @@ class HeAPPlacer int placed_cell_count = 0; for (auto cell : sorted(ctx->cells)) { CellInfo *ci = cell.second; + ci->udata = -1; if (ci->bel != BelId()) { Loc loc = ctx->getBelLocation(ci->bel); cell_locs[cell.first].x = loc.x; cell_locs[cell.first].y = loc.y; cell_locs[cell.first].locked = true; cell_locs[cell.first].global = ctx->getBelGlobalBuf(ci->bel); - } else if (ci->constr_parent == nullptr) { if (!available_bels.count(ci->type) || available_bels.at(ci->type).empty()) log_error("Unable to place cell '%s', no Bels remaining of type '%s'\n", ci->name.c_str(ctx), @@ -282,8 +319,14 @@ class HeAPPlacer cell_locs[cell.first].y = loc.y; cell_locs[cell.first].locked = false; cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel); - ci->udata = placed_cell_count++; - place_cells.push_back(ci); + // FIXME + if (has_connectivity(cell.second) && cell.second->type != ctx->id("SB_IO")) { + ci->udata = placed_cell_count++; + place_cells.push_back(ci); + } else { + ctx->bindBel(bel, ci, STRENGTH_STRONG); + cell_locs[cell.first].locked = true; + } } } } @@ -354,6 +397,8 @@ class HeAPPlacer ubport = &port; } }); + NPNR_ASSERT(lbport != nullptr); + NPNR_ASSERT(ubport != nullptr); // Add all relevant connections to the matrix foreach_port(ni, [&](PortRef &port) { int this_pos = cell_pos(port.cell); @@ -361,9 +406,9 @@ class HeAPPlacer if (other == &port) return; int o_pos = cell_pos(other->cell); - if (o_pos == this_pos) - return; // FIXME: or clamp to 1? - double weight = 1. / (ni->users.size() * std::abs(o_pos - this_pos)); + // if (o_pos == this_pos) + // return; // FIXME: or clamp to 1? + double weight = 1. / (ni->users.size() * std::max(1, std::abs(o_pos - this_pos))); // FIXME: add criticality to weighting // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, @@ -403,6 +448,35 @@ class HeAPPlacer std::vector vals; std::transform(place_cells.begin(), place_cells.end(), std::back_inserter(vals), cell_pos); es.solve(vals); + for (size_t i = 0; i < vals.size(); i++) + if (yaxis) + cell_locs.at(place_cells.at(i)->name).y = int(vals.at(i) + 0.5); + else + cell_locs.at(place_cells.at(i)->name).x = int(vals.at(i) + 0.5); + } + + // Compute HPWL + wirelen_t total_hpwl() + { + wirelen_t hpwl = 0; + for (auto net : sorted(ctx->nets)) { + NetInfo *ni = net.second; + if (ni->driver.cell == nullptr) + continue; + CellLocation &drvloc = cell_locs.at(ni->driver.cell->name); + if (drvloc.global) + continue; + int xmin = drvloc.x, xmax = drvloc.x, ymin = drvloc.y, ymax = drvloc.y; + for (auto &user : ni->users) { + CellLocation &usrloc = cell_locs.at(user.cell->name); + xmin = std::min(xmin, usrloc.x); + xmax = std::max(xmax, usrloc.x); + ymin = std::min(ymin, usrloc.y); + ymax = std::max(ymax, usrloc.y); + } + hpwl += (xmax - xmin) + (ymax - ymin); + } + return hpwl; } }; diff --git a/common/placer_math.c b/common/placer_math.c index 456bc0a1..53b6190d 100644 --- a/common/placer_math.c +++ b/common/placer_math.c @@ -1,6 +1,7 @@ #include "taucs.h" #include "placer_math.h" #include +#include void taucif_init_solver() { taucs_logfile("stdout"); @@ -13,14 +14,15 @@ struct taucif_system { struct taucif_system *taucif_create_system(int rows, int cols, int n_nonzero) { struct taucif_system *sys = taucs_malloc(sizeof(struct taucif_system)); - sys->mat = taucs_ccs_create(cols, rows, n_nonzero, TAUCS_DOUBLE | TAUCS_SYMMETRIC); + sys->mat = taucs_ccs_create(cols, rows, n_nonzero, TAUCS_DOUBLE | TAUCS_SYMMETRIC | TAUCS_LOWER); // Internal pointers sys->ccs_i = 0; sys->ccs_col = -1; return sys; }; -void taucif_set_matrix_value(struct taucif_system *sys, int row, int col, double value) { +void taucif_add_matrix_value(struct taucif_system *sys, int row, int col, double value) { + assert(sys->ccs_col <= col); while(sys->ccs_col < col) { sys->mat->colptr[++sys->ccs_col] = sys->ccs_i; } @@ -28,16 +30,26 @@ void taucif_set_matrix_value(struct taucif_system *sys, int row, int col, double sys->mat->values.d[sys->ccs_i++] = value; } -void taucif_solve_system(struct taucif_system *sys, double *x, double *rhs) { +void taucif_finalise_matrix(struct taucif_system *sys) { + sys->mat->colptr[++sys->ccs_col] = sys->ccs_i; +#if 0 + taucs_ccs_write_ijv(sys->mat, "matrix.ijv"); +#endif +} + +int taucif_solve_system(struct taucif_system *sys, double *x, double *rhs) { // FIXME: preconditioner, droptol?? taucs_ccs_matrix* precond_mat = taucs_ccs_factor_llt(sys->mat, 1e-3, 0); + if (precond_mat == NULL) + return -1; // FIXME: itermax, convergetol int cjres = taucs_conjugate_gradients(sys->mat, taucs_ccs_solve_llt, precond_mat, x, rhs, 1000, 1e-6); taucs_ccs_free(precond_mat); + return 0; } void taucif_free_system(struct taucif_system *sys) { taucs_ccs_free(sys->mat); - taucs_free(sys->mat); + taucs_free(sys); } diff --git a/common/placer_math.h b/common/placer_math.h index 3782e99f..c197036c 100644 --- a/common/placer_math.h +++ b/common/placer_math.h @@ -30,9 +30,11 @@ struct taucif_system; extern struct taucif_system *taucif_create_system(int rows, int cols, int n_nonzero); -extern void taucif_set_matrix_value(struct taucif_system *sys, int row, int col, double value); +extern void taucif_add_matrix_value(struct taucif_system *sys, int row, int col, double value); -extern void taucif_solve_system(struct taucif_system *sys, double *x, double *rhs); +extern void taucif_finalise_matrix(struct taucif_system *sys); + +extern int taucif_solve_system(struct taucif_system *sys, double *x, double *rhs); extern void taucif_free_system(struct taucif_system *sys); From d1808c2594d2ec5de23f7ff8da96af2ac5cfdd1f Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 11 Jan 2019 11:31:56 +0000 Subject: [PATCH 24/59] HeAP: Fix how initial placement handles chains Signed-off-by: David Shah --- common/placer_heap.cc | 76 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 67 insertions(+), 9 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index d348d260..d3ba63bc 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -120,8 +120,11 @@ class HeAPPlacer build_fast_bels(); seed_placement(); update_all_chains(); - + wirelen_t hpwl = total_hpwl(); + log_info("Initial placer starting hpwl = %d\n", int(hpwl)); for (int i = 0; i < 20; i++) { + setup_solve_cells(); + EquationSystem esx(place_cells.size(), place_cells.size()); build_equations(esx, false); // log_info("x-axis\n"); @@ -134,7 +137,7 @@ class HeAPPlacer update_all_chains(); - wirelen_t hpwl = total_hpwl(); + hpwl = total_hpwl(); log_info("Initial placer iter %d, hpwl = %d\n", i, int(hpwl)); } @@ -168,6 +171,17 @@ class HeAPPlacer // (only the root of each macro is placed.) std::vector place_cells; + // The cells in the current equation being solved (a subset of place_cells in some cases, where we only place + // cells of a certain type) + std::vector solve_cells; + + // For cells in a chain, this is the ultimate root cell of the chain (sometimes this is not constr_parent + // where chains are within chains + std::unordered_map chain_root; + + // The offset from chain_root to a cell in the chain + std::unordered_map> cell_offsets; + // Place cells with the BEL attribute set to constrain them void place_constraints() { @@ -298,10 +312,8 @@ class HeAPPlacer } for (auto &ab : available_bels) ctx->shuffle(ab.second); - int placed_cell_count = 0; for (auto cell : sorted(ctx->cells)) { CellInfo *ci = cell.second; - ci->udata = -1; if (ci->bel != BelId()) { Loc loc = ctx->getBelLocation(ci->bel); cell_locs[cell.first].x = loc.x; @@ -321,7 +333,6 @@ class HeAPPlacer cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel); // FIXME if (has_connectivity(cell.second) && cell.second->type != ctx->id("SB_IO")) { - ci->udata = placed_cell_count++; place_cells.push_back(ci); } else { ctx->bindBel(bel, ci, STRENGTH_STRONG); @@ -331,8 +342,28 @@ class HeAPPlacer } } + // Setup the cells to be solved, returns the number of rows + int setup_solve_cells(std::unordered_set *celltypes = nullptr) { + int row = 0; + solve_cells.clear(); + // First clear the udata of all cells + for (auto cell : sorted(ctx->cells)) + cell.second->udata = dont_solve; + // Then update cells to be placed, which excludes cell children + for (auto cell : place_cells) { + if (celltypes && !celltypes->count(cell->type)) + continue; + cell->udata = row++; + solve_cells.push_back(cell); + } + // Finally, update the udata of children + for (auto chained : chain_root) + ctx->cells.at(chained.first)->udata = chained.second->udata; + return row; + } + // Update the location of all children of a chain - void update_chain(CellInfo *cell) + void update_chain(CellInfo *cell, CellInfo *root) { const auto &base = cell_locs[cell->name]; for (auto child : cell->constr_children) { @@ -344,8 +375,9 @@ class HeAPPlacer cell_locs[child->name].y = base.y + child->constr_y; else cell_locs[child->name].y = base.y; // better handling of UNCONSTR? + chain_root[cell->name] = root; if (!child->constr_children.empty()) - update_chain(child); + update_chain(child, root); } } @@ -354,7 +386,7 @@ class HeAPPlacer { for (auto cell : place_cells) { if (!cell->constr_children.empty()) - update_chain(cell); + update_chain(cell, cell); } } @@ -399,6 +431,22 @@ class HeAPPlacer }); NPNR_ASSERT(lbport != nullptr); NPNR_ASSERT(ubport != nullptr); + + auto stamp_equation = [&](PortRef &var, PortRef &eqn, double weight) { + if (eqn.cell->udata == dont_solve) + return; + int row = eqn.cell->udata; + int v_pos = cell_pos(var.cell); + if (var.cell->udata != dont_solve) { + es.add_coeff(row, var.cell->udata, weight); + } else { + es.add_rhs(row, -v_pos * weight); + } + if (cell_offsets.count(var.cell->name)) { + es.add_rhs(row, -(yaxis ? cell_offsets.at(var.cell->name).second : cell_offsets.at(var.cell->name).first) * weight); + } + }; + // Add all relevant connections to the matrix foreach_port(ni, [&](PortRef &port) { int this_pos = cell_pos(port.cell); @@ -413,7 +461,13 @@ class HeAPPlacer // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, // if the other end isn't fixed - if (!cell_locs.at(port.cell->name).locked) { + stamp_equation(port, port, weight); + stamp_equation(port, *other, -weight); + stamp_equation(*other, *other, weight); + stamp_equation(*other, port, -weight); + +/* + if (port.cell->udata != -1) { es.add_coeff(port.cell->udata, port.cell->udata, weight); if (!cell_locs.at(other->cell->name).locked) es.add_coeff(other->cell->udata, port.cell->udata, -weight); @@ -432,6 +486,7 @@ class HeAPPlacer if (!cell_locs.at(port.cell->name).locked) es.add_rhs(port.cell->udata, this_pos * weight); } +*/ }; process_arc(lbport); process_arc(ubport); @@ -478,6 +533,9 @@ class HeAPPlacer } return hpwl; } + + typedef decltype(CellInfo::udata) cell_udata_t; + cell_udata_t dont_solve = std::numeric_limits::max(); }; bool placer_heap(Context *ctx) { return HeAPPlacer(ctx).place(); } From 4d2906378f36cd0131fc1a8dd30ad40980d4c0bb Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 11 Jan 2019 11:59:34 +0000 Subject: [PATCH 25/59] HeAP: Region finder for spreading and strict legaliser Signed-off-by: David Shah --- common/placer_heap.cc | 490 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 454 insertions(+), 36 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index d3ba63bc..6cd0459d 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -26,6 +26,7 @@ #include #include +#include #include #include "log.h" #include "nextpnr.h" @@ -125,12 +126,12 @@ class HeAPPlacer for (int i = 0; i < 20; i++) { setup_solve_cells(); - EquationSystem esx(place_cells.size(), place_cells.size()); + EquationSystem esx(solve_cells.size(), solve_cells.size()); build_equations(esx, false); // log_info("x-axis\n"); solve_equations(esx, false); - EquationSystem esy(place_cells.size(), place_cells.size()); + EquationSystem esy(solve_cells.size(), solve_cells.size()); build_equations(esy, true); // log_info("y-axis\n"); solve_equations(esy, true); @@ -141,6 +142,48 @@ class HeAPPlacer log_info("Initial placer iter %d, hpwl = %d\n", i, int(hpwl)); } + // legalise_with_cuts(true); + CutLegaliser(this, ctx->id("ICESTORM_LC")).run(); + NPNR_ASSERT(false); + + bool valid = false; + wirelen_t solved_hpwl = 0, legal_hpwl = 1, best_hpwl = std::numeric_limits::max(); + int iter = 0, stalled = 0; + while (!valid || (stalled < 5 && (solved_hpwl < legal_hpwl * 0.8))) { + if ((solved_hpwl < legal_hpwl * 0.8) || (stalled > 5)) { + stalled = 0; + best_hpwl = std::numeric_limits::max(); + valid = true; + } + setup_solve_cells(); + + EquationSystem esx(solve_cells.size(), solve_cells.size()); + build_equations(esx, false, iter); + // log_info("x-axis\n"); + solve_equations(esx, false); + + EquationSystem esy(solve_cells.size(), solve_cells.size()); + build_equations(esy, true, iter); + // log_info("y-axis\n"); + solve_equations(esy, true); + solved_hpwl = total_hpwl(); + log_info("Solved HPWL = %d\n", int(solved_hpwl)); + + update_all_chains(); + legalise_placement_simple(valid); + update_all_chains(); + + legal_hpwl = total_hpwl(); + log_info("Legalised HPWL = %d\n", int(legal_hpwl)); + if (legal_hpwl < best_hpwl) { + best_hpwl = legal_hpwl; + stalled = 0; + } else { + ++stalled; + } + ctx->yield(); + ++iter; + } ctx->unlock(); return true; } @@ -177,7 +220,8 @@ class HeAPPlacer // For cells in a chain, this is the ultimate root cell of the chain (sometimes this is not constr_parent // where chains are within chains - std::unordered_map chain_root; + std::unordered_map chain_root; + std::unordered_map chain_size; // The offset from chain_root to a cell in the chain std::unordered_map> cell_offsets; @@ -267,22 +311,22 @@ class HeAPPlacer // Traverse outwards through nearest_row_with_bel and nearest_col_with_bel, stopping once // another row/col is already recorded as being nearer for (int x = loc.x; x <= max_x; x++) { - if (nc.at(x) == -1 || std::abs(loc.x - nc.at(x)) <= (x - loc.x)) + if (nc.at(x) != -1 && std::abs(loc.x - nc.at(x)) <= (x - loc.x)) break; nc.at(x) = loc.x; } for (int x = loc.x - 1; x >= 0; x--) { - if (nc.at(x) == -1 || std::abs(loc.x - nc.at(x)) <= (loc.x - x)) + if (nc.at(x) != -1 && std::abs(loc.x - nc.at(x)) <= (loc.x - x)) break; nc.at(x) = loc.x; } for (int y = loc.y; y <= max_y; y++) { - if (nr.at(y) == -1 || std::abs(loc.y - nr.at(y)) <= (y - loc.y)) + if (nr.at(y) != -1 && std::abs(loc.y - nr.at(y)) <= (y - loc.y)) break; nr.at(y) = loc.y; } for (int y = loc.y - 1; y >= 0; y--) { - if (nr.at(y) == -1 || std::abs(loc.y - nr.at(y)) <= (loc.y - y)) + if (nr.at(y) != -1 && std::abs(loc.y - nr.at(y)) <= (loc.y - y)) break; nr.at(y) = loc.y; } @@ -343,7 +387,8 @@ class HeAPPlacer } // Setup the cells to be solved, returns the number of rows - int setup_solve_cells(std::unordered_set *celltypes = nullptr) { + int setup_solve_cells(std::unordered_set *celltypes = nullptr) + { int row = 0; solve_cells.clear(); // First clear the udata of all cells @@ -367,6 +412,7 @@ class HeAPPlacer { const auto &base = cell_locs[cell->name]; for (auto child : cell->constr_children) { + chain_size[root->name]++; if (child->constr_x != child->UNCONSTR) cell_locs[child->name].x = base.x + child->constr_x; else @@ -385,6 +431,7 @@ class HeAPPlacer void update_all_chains() { for (auto cell : place_cells) { + chain_size[cell->name] = 1; if (!cell->constr_children.empty()) update_chain(cell, cell); } @@ -400,7 +447,7 @@ class HeAPPlacer } // Build the system of equations for either X or Y - void build_equations(EquationSystem &es, bool yaxis) + void build_equations(EquationSystem &es, bool yaxis, int iter = -1) { // Return the x or y position of a cell, depending on ydir auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; @@ -443,7 +490,9 @@ class HeAPPlacer es.add_rhs(row, -v_pos * weight); } if (cell_offsets.count(var.cell->name)) { - es.add_rhs(row, -(yaxis ? cell_offsets.at(var.cell->name).second : cell_offsets.at(var.cell->name).first) * weight); + es.add_rhs(row, -(yaxis ? cell_offsets.at(var.cell->name).second + : cell_offsets.at(var.cell->name).first) * + weight); } }; @@ -465,33 +514,20 @@ class HeAPPlacer stamp_equation(port, *other, -weight); stamp_equation(*other, *other, weight); stamp_equation(*other, port, -weight); - -/* - if (port.cell->udata != -1) { - es.add_coeff(port.cell->udata, port.cell->udata, weight); - if (!cell_locs.at(other->cell->name).locked) - es.add_coeff(other->cell->udata, port.cell->udata, -weight); - } else { - // Add our fixed position to the other end's RHS - if (!cell_locs.at(other->cell->name).locked) - es.add_rhs(other->cell->udata, this_pos * weight); - } - // Opposite for the other end of the connection - if (!cell_locs.at(other->cell->name).locked) { - es.add_coeff(other->cell->udata, other->cell->udata, weight); - if (!cell_locs.at(port.cell->name).locked) - es.add_coeff(port.cell->udata, other->cell->udata, -weight); - } else { - // Add our fixed position to the other end's RHS - if (!cell_locs.at(port.cell->name).locked) - es.add_rhs(port.cell->udata, this_pos * weight); - } -*/ }; process_arc(lbport); process_arc(ubport); }); } + if (iter != -1) { + const float alpha = 0.3; + float weight = alpha * iter; + for (size_t row = 0; row < solve_cells.size(); row++) { + // Add an arc from legalised to current position + es.add_coeff(row, row, weight); + es.add_rhs(row, weight * cell_pos(solve_cells.at(row))); + } + } } // Build the system of equations for either X or Y @@ -499,15 +535,14 @@ class HeAPPlacer { // Return the x or y position of a cell, depending on ydir auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; - build_equations(es, yaxis); std::vector vals; - std::transform(place_cells.begin(), place_cells.end(), std::back_inserter(vals), cell_pos); + std::transform(solve_cells.begin(), solve_cells.end(), std::back_inserter(vals), cell_pos); es.solve(vals); for (size_t i = 0; i < vals.size(); i++) if (yaxis) - cell_locs.at(place_cells.at(i)->name).y = int(vals.at(i) + 0.5); + cell_locs.at(solve_cells.at(i)->name).y = std::min(max_y, std::max(0, int(vals.at(i) + 0.5))); else - cell_locs.at(place_cells.at(i)->name).x = int(vals.at(i) + 0.5); + cell_locs.at(solve_cells.at(i)->name).x = std::min(max_x, std::max(0, int(vals.at(i) + 0.5))); } // Compute HPWL @@ -534,6 +569,389 @@ class HeAPPlacer return hpwl; } + // Swap the Bel of a cell with another, return the original location + BelId swap_cell_bels(CellInfo *cell, BelId newBel) + { + BelId oldBel = cell->bel; + CellInfo *bound = ctx->getBoundBelCell(newBel); + if (bound != nullptr) + ctx->unbindBel(newBel); + ctx->unbindBel(oldBel); + ctx->bindBel(newBel, cell, STRENGTH_WEAK); + if (bound != nullptr) + ctx->bindBel(oldBel, bound, STRENGTH_WEAK); + return oldBel; + } + + // Placement legalisation + // Note that there are *two meanings* of legalisation in nextpnr placement + // The first kind, as in HeAP, simply ensures that there is no overlap (each Bel maps only to one cell) + // The second kind also ensures that validity rules (isValidBelForCell) are met, because there is no guarantee + // in nextpnr that Bels are freely swappable (indeed many a architectures Bel is a logic cell with complex + // validity rules for control sets, etc, rather than a CLB/tile as in a more conventional pack&place flow) + void legalise_placement_simple(bool require_validity = false) + { + // Unbind all cells placed in this solution + for (auto cell : sorted(ctx->cells)) { + CellInfo *ci = cell.second; + if (ci->udata != dont_solve && ci->bel != BelId()) + ctx->unbindBel(ci->bel); + } + + // At the moment we don't follow the full HeAP algorithm using cuts for legalisation, instead using + // the simple greedy largest-macro-first approach. + std::priority_queue> remaining; + for (auto cell : solve_cells) { + remaining.emplace(chain_size[cell->name], cell->name); + } + + while (!remaining.empty()) { + auto top = remaining.top(); + remaining.pop(); + + CellInfo *ci = ctx->cells.at(top.second).get(); + // Was now placed, ignore + if (ci->bel != BelId()) + continue; + // log_info(" Legalising %s\n", top.second.c_str(ctx)); + int bt = std::get<0>(bel_types.at(ci->type)); + auto &fb = fast_bels.at(bt); + int radius = 0; + int iter = 0; + bool placed = false; + while (!placed) { + + int nx = ctx->rng(2 * radius + 1) + std::max(cell_locs.at(ci->name).x - radius, 0); + int ny = ctx->rng(2 * radius + 1) + std::max(cell_locs.at(ci->name).x - radius, 0); + + iter++; + if ((iter % (20 * (radius + 1))) == 0) + radius = std::min(std::max(max_x, max_y), radius + 1); + + if (nx < 0 || nx > max_x) + continue; + if (ny < 0 || ny > max_x) + continue; + + // ny = nearest_row_with_bel.at(bt).at(ny); + // nx = nearest_col_with_bel.at(bt).at(nx); + + if (nx >= int(fb.size())) + continue; + if (ny >= int(fb.at(nx).size())) + continue; + if (fb.at(nx).at(ny).empty()) + continue; + + if (ci->constr_children.empty()) { + for (auto sz : fb.at(nx).at(ny)) { + if (ctx->checkBelAvail(sz) || radius > (max_x / 4)) { + CellInfo *bound = ctx->getBoundBelCell(sz); + if (bound != nullptr) { + if (bound->constr_parent != nullptr || !bound->constr_children.empty()) + continue; + ctx->unbindBel(bound->bel); + remaining.emplace(chain_size[bound->name], bound->name); + } + ctx->bindBel(sz, ci, STRENGTH_WEAK); + if (require_validity && !ctx->isBelLocationValid(sz)) { + ctx->unbindBel(sz); + if (bound != nullptr) + ctx->bindBel(sz, bound, STRENGTH_WEAK); + } else { + Loc loc = ctx->getBelLocation(sz); + cell_locs[ci->name].x = loc.x; + cell_locs[ci->name].y = loc.y; + placed = true; + break; + } + } + } + } else { + // FIXME + NPNR_ASSERT(false); + } + } + } + } + + static constexpr float beta = 0.9; + + struct ChainExtent + { + int x0, y0, x1, y1; + }; + + struct LegaliserRegion + { + int id; + int x0, y0, x1, y1; + int cells, bels; + std::unordered_set included_chains; + bool overused() const + { + if (bels < 4) + return cells > bels; + else + return cells > beta * bels; + } + }; + + class CutLegaliser + { + public: + CutLegaliser(HeAPPlacer *p, IdString beltype) + : p(p), ctx(p->ctx), beltype(beltype), fb(p->fast_bels.at(std::get<0>(p->bel_types.at(beltype)))) + { + } + + void run() + { + init(); + find_overused_regions(); + expand_regions(); + for (auto &r : regions) { + if (!merged_regions.count(r.id)) + log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, + r.bels); + } + } + + private: + HeAPPlacer *p; + Context *ctx; + IdString beltype; + std::vector> occupancy; + std::vector> groups; + std::vector> chaines; + std::vector>> &fb; + + std::vector regions; + std::unordered_set merged_regions; + + int occ_at(int x, int y) { return occupancy.at(x).at(y); } + + int bels_at(int x, int y) + { + if (x >= int(fb.size()) || y >= int(fb.at(x).size())) + return 0; + return int(fb.at(x).at(y).size()); + } + + void init() + { + occupancy.resize(p->max_x + 1, std::vector(p->max_y + 1, 0)); + groups.resize(p->max_x + 1, std::vector(p->max_y + 1, -1)); + chaines.resize(p->max_x + 1, std::vector(p->max_y + 1)); + + for (int x = 0; x <= p->max_x; x++) + for (int y = 0; y <= p->max_y; y++) { + occupancy.at(x).at(y) = 0; + groups.at(x).at(y) = -1; + chaines.at(x).at(y) = {x, y, x, y}; + } + + std::map cr_extents; + + auto set_chain_ext = [&](IdString cell, int x, int y) { + if (!cr_extents.count(cell)) + cr_extents[cell] = {x, y, x, y}; + else { + cr_extents[cell].x0 = std::min(cr_extents[cell].x0, x); + cr_extents[cell].y0 = std::min(cr_extents[cell].y0, y); + cr_extents[cell].x1 = std::max(cr_extents[cell].x1, x); + cr_extents[cell].y1 = std::max(cr_extents[cell].y1, y); + } + }; + + for (auto &cell : p->cell_locs) { + if (ctx->cells.at(cell.first)->type == beltype) + occupancy.at(cell.second.x).at(cell.second.y)++; + // Compute ultimate extent of each chain root + if (p->chain_root.count(cell.first)) { + set_chain_ext(p->chain_root.at(cell.first)->name, cell.second.x, cell.second.y); + } else if (!ctx->cells.at(cell.first)->constr_children.empty()) { + set_chain_ext(cell.first, cell.second.x, cell.second.y); + } + } + for (auto &cell : p->cell_locs) { + // Transfer chain extents to the actual chaines structure + ChainExtent *ce = nullptr; + if (p->chain_root.count(cell.first)) + ce = &(cr_extents.at(p->chain_root.at(cell.first)->name)); + else if (!ctx->cells.at(cell.first)->constr_children.empty()) + ce = &(cr_extents.at(cell.first)); + if (ce) { + auto &lce = chaines.at(cell.second.x).at(cell.second.y); + lce.x0 = std::min(lce.x0, ce->x0); + lce.y0 = std::min(lce.y0, ce->y0); + lce.x1 = std::max(lce.x1, ce->x1); + lce.y1 = std::max(lce.y1, ce->y1); + } + } + } + void merge_regions(LegaliserRegion &merged, LegaliserRegion &mergee) + { + // Prevent grow_region from recursing while doing this + for (int x = mergee.x0; x <= mergee.x1; x++) + for (int y = mergee.y0; y <= mergee.y1; y++) { + // log_info("%d %d\n", groups.at(x).at(y), mergee.id); + NPNR_ASSERT(groups.at(x).at(y) == mergee.id); + groups.at(x).at(y) = merged.id; + merged.cells += occ_at(x, y); + merged.bels += bels_at(x, y); + } + merged_regions.insert(mergee.id); + grow_region(merged, mergee.x0, mergee.y0, mergee.x1, mergee.y1); + } + + void grow_region(LegaliserRegion &r, int x0, int y0, int x1, int y1, bool init = false) + { + // log_info("growing to (%d, %d) |_> (%d, %d)\n", x0, y0, x1, y1); + if ((x0 >= r.x0 && y0 >= r.y0 && x1 <= r.x1 && y1 <= r.y1) || init) + return; + int old_x0 = r.x0 + (init ? 1 : 0), old_y0 = r.y0, old_x1 = r.x1, old_y1 = r.y1; + r.x0 = std::min(r.x0, x0); + r.y0 = std::min(r.y0, y0); + r.x1 = std::max(r.x1, x1); + r.y1 = std::max(r.y1, y1); + + auto process_location = [&](int x, int y) { + // Merge with any overlapping regions + if (groups.at(x).at(y) != r.id) { + r.bels += bels_at(x, y); + r.cells += occ_at(x, y); + } + if (groups.at(x).at(y) != -1 && groups.at(x).at(y) != r.id) + merge_regions(r, regions.at(groups.at(x).at(y))); + groups.at(x).at(y) = r.id; + // Grow to cover any chains + auto &chaine = chaines.at(x).at(y); + grow_region(r, chaine.x0, chaine.y0, chaine.x1, chaine.y1); + }; + for (int x = r.x0; x < old_x0; x++) + for (int y = r.y0; y <= r.y1; y++) + process_location(x, y); + for (int x = old_x1 + 1; x <= x1; x++) + for (int y = r.y0; y <= r.y1; y++) + process_location(x, y); + for (int y = r.y0; y < old_y0; y++) + for (int x = r.x0; x <= r.x1; x++) + process_location(x, y); + for (int y = old_y1 + 1; y <= r.y1; y++) + for (int x = r.x0; x <= r.x1; x++) + process_location(x, y); + } + + void find_overused_regions() + { + for (int x = 0; x <= p->max_x; x++) + for (int y = 0; y <= p->max_y; y++) { + // Either already in a group, or not overutilised. Ignore + if (groups.at(x).at(y) != -1 || (occ_at(x, y) <= bels_at(x, y))) + continue; + // log_info("%d %d %d\n", x, y, occ_at(x, y)); + int id = int(regions.size()); + groups.at(x).at(y) = id; + LegaliserRegion reg; + reg.id = id; + reg.x0 = reg.x1 = x; + reg.y0 = reg.y1 = y; + reg.bels = bels_at(x, y); + reg.cells = occ_at(x, y); + // Make sure we cover carries, etc + grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1, true); + + bool expanded = true; + while (expanded) { + expanded = false; + // Keep trying expansion in x and y, until we find no over-occupancy cells + // or hit grouped cells + + // First try expanding in x + if (reg.x1 < p->max_x) { + bool over_occ_x = false; + for (int y1 = reg.y0; y1 <= reg.y1; y1++) { + if (occ_at(reg.x1 + 1, y1) > bels_at(reg.x1 + 1, y1)) { + // log_info("(%d, %d) occ %d bels %d\n", reg.x1+ 1, y1, occ_at(reg.x1 + 1, y1), + // bels_at(reg.x1 + 1, y1)); + over_occ_x = true; + break; + } + } + if (over_occ_x) { + expanded = true; + grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); + } + } + + if (reg.y1 < p->max_y) { + bool over_occ_y = false; + for (int x1 = reg.x0; x1 <= reg.x1; x1++) { + if (occ_at(x1, reg.y1 + 1) > bels_at(x1, reg.y1 + 1)) { + // log_info("(%d, %d) occ %d bels %d\n", x1, reg.y1 + 1, occ_at(x1, reg.y1 + 1), + // bels_at(x1, reg.y1 + 1)); + over_occ_y = true; + break; + } + } + if (over_occ_y) { + expanded = true; + grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); + } + } + } + regions.push_back(reg); + } + } + + void expand_regions() + { + std::queue overu_regions; + for (auto &r : regions) { + if (!merged_regions.count(r.id) && r.overused()) + overu_regions.push(r.id); + } + while (!overu_regions.empty()) { + int rid = overu_regions.front(); + overu_regions.pop(); + if (merged_regions.count(rid)) + continue; + auto ® = regions.at(rid); + while (reg.overused()) { + bool changed = false; + if (reg.x0 > 0) { + grow_region(reg, reg.x0 - 1, reg.y0, reg.x1, reg.y1); + changed = true; + if (!reg.overused()) + break; + } + if (reg.x1 < p->max_x) { + grow_region(reg, reg.x0, reg.y0, reg.x1 + 1, reg.y1); + changed = true; + if (!reg.overused()) + break; + } + if (reg.y0 > 0) { + grow_region(reg, reg.x0, reg.y0 - 1, reg.x1, reg.y1); + changed = true; + if (!reg.overused()) + break; + } + if (reg.y1 < p->max_y) { + grow_region(reg, reg.x0, reg.y0, reg.x1, reg.y1 + 1); + changed = true; + if (!reg.overused()) + break; + } + if (!changed) + log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0, reg.x1, + reg.y1, reg.cells, beltype.c_str(ctx)); + } + } + } + }; + typedef decltype(CellInfo::udata) cell_udata_t; cell_udata_t dont_solve = std::numeric_limits::max(); }; From 8a791e83097f6b6bd256e0412a475b9be0e79414 Mon Sep 17 00:00:00 2001 From: David Shah Date: Tue, 15 Jan 2019 15:20:38 +0000 Subject: [PATCH 26/59] HeAP: Cut finder for spreading Signed-off-by: David Shah --- common/placer_heap.cc | 148 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 130 insertions(+), 18 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 6cd0459d..6b6a6225 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -207,6 +207,7 @@ class HeAPPlacer struct CellLocation { int x, y; + double rawx, rawy; bool locked, global; }; std::unordered_map cell_locs; @@ -414,11 +415,11 @@ class HeAPPlacer for (auto child : cell->constr_children) { chain_size[root->name]++; if (child->constr_x != child->UNCONSTR) - cell_locs[child->name].x = base.x + child->constr_x; + cell_locs[child->name].x = std::min(max_x, base.x + child->constr_x); else cell_locs[child->name].x = base.x; // better handling of UNCONSTR? if (child->constr_y != child->UNCONSTR) - cell_locs[child->name].y = base.y + child->constr_y; + cell_locs[child->name].y = std::min(max_y, base.y + child->constr_y); else cell_locs[child->name].y = base.y; // better handling of UNCONSTR? chain_root[cell->name] = root; @@ -531,18 +532,20 @@ class HeAPPlacer } // Build the system of equations for either X or Y - void solve_equations(EquationSystem &es, bool yaxis) - { + void solve_equations(EquationSystem &es, bool yaxis) { // Return the x or y position of a cell, depending on ydir auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; std::vector vals; std::transform(solve_cells.begin(), solve_cells.end(), std::back_inserter(vals), cell_pos); es.solve(vals); for (size_t i = 0; i < vals.size(); i++) - if (yaxis) + if (yaxis) { + cell_locs.at(solve_cells.at(i)->name).rawy = vals.at(i); cell_locs.at(solve_cells.at(i)->name).y = std::min(max_y, std::max(0, int(vals.at(i) + 0.5))); - else + } else { + cell_locs.at(solve_cells.at(i)->name).rawx = vals.at(i); cell_locs.at(solve_cells.at(i)->name).x = std::min(max_x, std::max(0, int(vals.at(i) + 0.5))); + } } // Compute HPWL @@ -724,10 +727,15 @@ class HeAPPlacer std::vector> occupancy; std::vector> groups; std::vector> chaines; + std::map cell_extents; + std::vector>> &fb; std::vector regions; std::unordered_set merged_regions; + // Cells at a location, sorted by real (not integer) x and y + std::vector>> cells_at_location_sx; + std::vector>> cells_at_location_sy; int occ_at(int x, int y) { return occupancy.at(x).at(y); } @@ -738,12 +746,12 @@ class HeAPPlacer return int(fb.at(x).at(y).size()); } - void init() - { + void init() { occupancy.resize(p->max_x + 1, std::vector(p->max_y + 1, 0)); groups.resize(p->max_x + 1, std::vector(p->max_y + 1, -1)); chaines.resize(p->max_x + 1, std::vector(p->max_y + 1)); - + cells_at_location_sx.resize(p->max_x + 1, std::vector>(p->max_y + 1)); + cells_at_location_sy.resize(p->max_x + 1, std::vector>(p->max_y + 1)); for (int x = 0; x <= p->max_x; x++) for (int y = 0; y <= p->max_y; y++) { occupancy.at(x).at(y) = 0; @@ -751,16 +759,15 @@ class HeAPPlacer chaines.at(x).at(y) = {x, y, x, y}; } - std::map cr_extents; auto set_chain_ext = [&](IdString cell, int x, int y) { - if (!cr_extents.count(cell)) - cr_extents[cell] = {x, y, x, y}; + if (!cell_extents.count(cell)) + cell_extents[cell] = {x, y, x, y}; else { - cr_extents[cell].x0 = std::min(cr_extents[cell].x0, x); - cr_extents[cell].y0 = std::min(cr_extents[cell].y0, y); - cr_extents[cell].x1 = std::max(cr_extents[cell].x1, x); - cr_extents[cell].y1 = std::max(cr_extents[cell].y1, y); + cell_extents[cell].x0 = std::min(cell_extents[cell].x0, x); + cell_extents[cell].y0 = std::min(cell_extents[cell].y0, y); + cell_extents[cell].x1 = std::max(cell_extents[cell].x1, x); + cell_extents[cell].y1 = std::max(cell_extents[cell].y1, y); } }; @@ -778,9 +785,9 @@ class HeAPPlacer // Transfer chain extents to the actual chaines structure ChainExtent *ce = nullptr; if (p->chain_root.count(cell.first)) - ce = &(cr_extents.at(p->chain_root.at(cell.first)->name)); + ce = &(cell_extents.at(p->chain_root.at(cell.first)->name)); else if (!ctx->cells.at(cell.first)->constr_children.empty()) - ce = &(cr_extents.at(cell.first)); + ce = &(cell_extents.at(cell.first)); if (ce) { auto &lce = chaines.at(cell.second.x).at(cell.second.y); lce.x0 = std::min(lce.x0, ce->x0); @@ -789,6 +796,20 @@ class HeAPPlacer lce.y1 = std::max(lce.y1, ce->y1); } } + for (auto cell : p->solve_cells) { + cells_at_location_sx.at(p->cell_locs.at(cell->name).x).at(p->cell_locs.at(cell->name).y).push_back(cell); + cells_at_location_sy.at(p->cell_locs.at(cell->name).x).at(p->cell_locs.at(cell->name).y).push_back(cell); + } + for (auto &col : cells_at_location_sx) + for (auto &loc : col) + std::sort(loc.begin(), loc.end(), [&](const CellInfo *a, const CellInfo *b){ + return p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx; + }); + for (auto &col : cells_at_location_sy) + for (auto &loc : col) + std::sort(loc.begin(), loc.end(), [&](const CellInfo *a, const CellInfo *b){ + return p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy; + }); } void merge_regions(LegaliserRegion &merged, LegaliserRegion &mergee) { @@ -950,6 +971,97 @@ class HeAPPlacer } } } + + // Implementation of the recursive cut-based spreading as described in the HeAP paper + // Note we use "left" to mean "-x/-y" depending on dir and "right" to mean "+x/+y" depending on dir + + std::vector cut_cells; + + void cut_region(LegaliserRegion &r, bool dir) { + cut_cells.clear(); + auto &cal = dir ? cells_at_location_sy : cells_at_location_sx; + for (int x = r.x0; x <= r.x1; x++) { + for (int y = r.y0; y <= r.y1; y++) { + std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells)); + } + } + // Find the cells midpoint, counting chains in terms of their total size - making the initial source cut + int pivot_cells = 0; + int pivot = 0; + for (auto &cell : cut_cells) { + pivot_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1; + if (pivot_cells >= r.cells / 2) + break; + pivot++; + } + // Find the clearance required either side of the pivot + int clearance_l = 0, clearance_r = 0; + for (size_t i = 0; i < cut_cells.size(); i++) { + int size; + if (cell_extents.count(cut_cells.at(i)->name)) { + auto &ce = cell_extents.at(cut_cells.at(i)->name); + size = dir ? (ce.y1 - ce.y0 + 1) : (ce.x1 - ce.x0 + 1); + } else { + size = 1; + } + if (i < pivot) + clearance_l = std::max(clearance_l, size); + else + clearance_r = std::max(clearance_r, size); + } + // Find the target cut that minimises difference in utilisation, whilst trying to ensure that all chains + // still fit + + // First trim the boundaries of the region in the axis-of-interest, skipping any rows/cols without any + // bels of the appropriate type + int trimmed_l = dir ? r.y0 : r.x0, trimmed_r = dir ? r.y1 : r.x1; + while (trimmed_l < (dir ? r.y1 : r.x1)) { + bool have_bels = false; + for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) + if (bels_at(dir ? i : trimmed_l, dir ? trimmed_l : i) > 0) { + have_bels = true; + break; + } + if (have_bels) + break; + trimmed_l++; + } + while (trimmed_r > (dir ? r.y0 : r.x0)) { + bool have_bels = false; + for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) + if (bels_at(dir ? i : trimmed_l, dir ? trimmed_l : i) > 0) { + have_bels = true; + break; + } + if (have_bels) + break; + trimmed_r--; + } + + // Now find the initial target cut that minimises utilisation imbalance, whilst + // meeting the clearance requirements for any large macros + int left_cells = pivot_cells, right_cells = r.cells - pivot_cells; + int left_bels = 0, right_bels = r.bels; + int best_tgt_cut = -1; + double best_deltaU = std::numeric_limits::max(); + + for (int i = trimmed_l; i <= trimmed_r; i++) { + int slither_bels = 0; + for (int j = dir ? r.x0 : r.y0; j <= (dir ? r.x1 : r.y1); j++) { + slither_bels += dir ? bels_at(j, i) : bels_at(i, j); + } + left_bels += slither_bels; + right_bels -= slither_bels; + if (((i - trimmed_l) + 1) >= clearance_l && ((trimmed_r - i) + 1) >= clearance_r) { + // Solution is potentially valid + double aU = std::abs(double(left_cells) / double(left_bels) - double(right_bels) / double(right_cells)); + if (aU < best_deltaU) { + best_deltaU = aU; + best_tgt_cut = i; + } + } + } + } }; typedef decltype(CellInfo::udata) cell_udata_t; From b483008cdf64645268326f8df10f5a0bcdb0c965 Mon Sep 17 00:00:00 2001 From: David Shah Date: Tue, 22 Jan 2019 15:16:00 +0000 Subject: [PATCH 27/59] HeAP: Cut peturbation, binning and intra-bin linear spreading Signed-off-by: David Shah --- common/placer_heap.cc | 151 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 132 insertions(+), 19 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 6b6a6225..2b6fc161 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -28,6 +28,7 @@ #include #include #include +#include #include "log.h" #include "nextpnr.h" #include "place_common.h" @@ -532,7 +533,8 @@ class HeAPPlacer } // Build the system of equations for either X or Y - void solve_equations(EquationSystem &es, bool yaxis) { + void solve_equations(EquationSystem &es, bool yaxis) + { // Return the x or y position of a cell, depending on ydir auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; std::vector vals; @@ -714,9 +716,11 @@ class HeAPPlacer find_overused_regions(); expand_regions(); for (auto &r : regions) { - if (!merged_regions.count(r.id)) - log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, - r.bels); + if (merged_regions.count(r.id)) + continue; + log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, + r.bels); + cut_region(r, false); } } @@ -734,8 +738,8 @@ class HeAPPlacer std::vector regions; std::unordered_set merged_regions; // Cells at a location, sorted by real (not integer) x and y - std::vector>> cells_at_location_sx; - std::vector>> cells_at_location_sy; + std::vector>> cells_at_location_sx; + std::vector>> cells_at_location_sy; int occ_at(int x, int y) { return occupancy.at(x).at(y); } @@ -746,7 +750,8 @@ class HeAPPlacer return int(fb.at(x).at(y).size()); } - void init() { + void init() + { occupancy.resize(p->max_x + 1, std::vector(p->max_y + 1, 0)); groups.resize(p->max_x + 1, std::vector(p->max_y + 1, -1)); chaines.resize(p->max_x + 1, std::vector(p->max_y + 1)); @@ -759,7 +764,6 @@ class HeAPPlacer chaines.at(x).at(y) = {x, y, x, y}; } - auto set_chain_ext = [&](IdString cell, int x, int y) { if (!cell_extents.count(cell)) cell_extents[cell] = {x, y, x, y}; @@ -797,17 +801,21 @@ class HeAPPlacer } } for (auto cell : p->solve_cells) { - cells_at_location_sx.at(p->cell_locs.at(cell->name).x).at(p->cell_locs.at(cell->name).y).push_back(cell); - cells_at_location_sy.at(p->cell_locs.at(cell->name).x).at(p->cell_locs.at(cell->name).y).push_back(cell); + cells_at_location_sx.at(p->cell_locs.at(cell->name).x) + .at(p->cell_locs.at(cell->name).y) + .push_back(cell); + cells_at_location_sy.at(p->cell_locs.at(cell->name).x) + .at(p->cell_locs.at(cell->name).y) + .push_back(cell); } for (auto &col : cells_at_location_sx) for (auto &loc : col) - std::sort(loc.begin(), loc.end(), [&](const CellInfo *a, const CellInfo *b){ + std::sort(loc.begin(), loc.end(), [&](const CellInfo *a, const CellInfo *b) { return p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx; }); for (auto &col : cells_at_location_sy) for (auto &loc : col) - std::sort(loc.begin(), loc.end(), [&](const CellInfo *a, const CellInfo *b){ + std::sort(loc.begin(), loc.end(), [&](const CellInfo *a, const CellInfo *b) { return p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy; }); } @@ -977,7 +985,8 @@ class HeAPPlacer std::vector cut_cells; - void cut_region(LegaliserRegion &r, bool dir) { + boost::optional> cut_region(LegaliserRegion &r, bool dir) + { cut_cells.clear(); auto &cal = dir ? cells_at_location_sy : cells_at_location_sx; for (int x = r.x0; x <= r.x1; x++) { @@ -994,6 +1003,8 @@ class HeAPPlacer break; pivot++; } + log_info("orig pivot %d lc %d rc %d\n", pivot, pivot_cells, r.cells - pivot_cells); + // Find the clearance required either side of the pivot int clearance_l = 0, clearance_r = 0; for (size_t i = 0; i < cut_cells.size(); i++) { @@ -1004,7 +1015,7 @@ class HeAPPlacer } else { size = 1; } - if (i < pivot) + if (int(i) < pivot) clearance_l = std::max(clearance_l, size); else clearance_r = std::max(clearance_r, size); @@ -1029,7 +1040,7 @@ class HeAPPlacer while (trimmed_r > (dir ? r.y0 : r.x0)) { bool have_bels = false; for (int i = dir ? r.x0 : r.y0; i <= (dir ? r.x1 : r.y1); i++) - if (bels_at(dir ? i : trimmed_l, dir ? trimmed_l : i) > 0) { + if (bels_at(dir ? i : trimmed_r, dir ? trimmed_r : i) > 0) { have_bels = true; break; } @@ -1037,14 +1048,16 @@ class HeAPPlacer break; trimmed_r--; } - + log_info("tl %d tr %d cl %d cr %d\n", trimmed_l, trimmed_r, clearance_l, clearance_r); + if ((trimmed_r - trimmed_l + 1) <= std::max(clearance_l, clearance_r)) + return {}; // Now find the initial target cut that minimises utilisation imbalance, whilst // meeting the clearance requirements for any large macros int left_cells = pivot_cells, right_cells = r.cells - pivot_cells; int left_bels = 0, right_bels = r.bels; int best_tgt_cut = -1; double best_deltaU = std::numeric_limits::max(); - + std::pair target_cut_bels; for (int i = trimmed_l; i <= trimmed_r; i++) { int slither_bels = 0; for (int j = dir ? r.x0 : r.y0; j <= (dir ? r.x1 : r.y1); j++) { @@ -1054,14 +1067,114 @@ class HeAPPlacer right_bels -= slither_bels; if (((i - trimmed_l) + 1) >= clearance_l && ((trimmed_r - i) + 1) >= clearance_r) { // Solution is potentially valid - double aU = std::abs(double(left_cells) / double(left_bels) - double(right_bels) / double(right_cells)); + double aU = + std::abs(double(left_cells) / double(left_bels) - double(right_cells) / double(right_bels)); if (aU < best_deltaU) { best_deltaU = aU; best_tgt_cut = i; + target_cut_bels = std::make_pair(left_bels, right_bels); } } } - } + NPNR_ASSERT(best_tgt_cut != -1); + left_bels = target_cut_bels.first; + right_bels = target_cut_bels.second; + log_info("pivot %d target cut %d lc %d lb %d rc %d rb %d\n", pivot, best_tgt_cut, left_cells, left_bels, right_cells, right_bels); + + // Peturb the source cut to eliminate overutilisation + while (pivot > 0 && (left_cells > left_bels)) { + auto &move_cell = cut_cells.at(pivot); + int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; + left_cells -= size; + right_cells += size; + pivot--; + } + while (pivot < (int(cut_cells.size()) - 1) && (right_cells > right_bels)) { + auto &move_cell = cut_cells.at(pivot + 1); + int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; + left_cells += size; + right_cells -= size; + pivot++; + } + log_info("peturbed pivot %d lc %d lb %d rc %d rb %d\n", pivot, left_cells, left_bels, right_cells, right_bels); + // Split regions into bins, and then spread cells by linear interpolation within those bins + auto spread_binlerp = [&](int cells_start, int cells_end, double area_l, double area_r) { + int N = 1 + cells_end - cells_start; + // Split region into up to 10 (K) bins + int K = std::min(N, 10); + std::vector> bin_bounds; // [start, end] + bin_bounds.emplace_back(cells_start, area_l); + for (int i = 1; i < K; i++) + bin_bounds.emplace_back(cells_start + (N * i) / K, + area_l + ((area_r - area_l + 0.4) * i) / K); + bin_bounds.emplace_back(cells_end, area_r + 0.4); + + for (int i = 0; i < K; i++) { + auto &bl = bin_bounds.at(i), br = bin_bounds.at(i + 1); + double orig_left = dir ? p->cell_locs.at(cut_cells.at(bl.first)->name).rawy + : p->cell_locs.at(cut_cells.at(bl.first)->name).rawx; + double orig_right = dir ? p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawy + : p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawx; + double m = (br.second - bl.second) / (orig_right - orig_left); + for (int j = bl.first; j < br.first; j++) { + auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy + : p->cell_locs.at(cut_cells.at(j)->name).rawx; + pos = bl.second + m * (pos - orig_left); + log_info("spread pos %f\n", pos); + } + } + }; + spread_binlerp(0, pivot + 1, trimmed_l, best_tgt_cut); + spread_binlerp(pivot + 1, int(cut_cells.size()), best_tgt_cut + 1, trimmed_r); + // Update various data structures + for (int x = r.x0; x <= r.x1; x++) + for (int y = r.y0; y <= r.y1; y++) { + cells_at_location_sx.at(x).at(y).clear(); + cells_at_location_sy.at(x).at(y).clear(); + } + for (auto cell : cut_cells) { + auto &cl = p->cell_locs.at(cell->name); + cl.x = std::min(r.x1, std::max(r.x0, int(cl.rawx + 0.5))); + cl.y = std::min(r.y1, std::max(r.y1, int(cl.rawy + 0.5))); + cells_at_location_sx.at(cl.x).at(cl.y).push_back(cell); + cells_at_location_sy.at(cl.x).at(cl.y).push_back(cell); + } + for (int x = r.x0; x <= r.x1; x++) + for (int y = r.y0; y <= r.y1; y++) { + auto &sx = cells_at_location_sx.at(x).at(y); + std::sort(sx.begin(), sx.end(), [&](const CellInfo *a, const CellInfo *b) { + return p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx; + }); + auto &sy = cells_at_location_sy.at(x).at(y); + std::sort(sy.begin(), sy.end(), [&](const CellInfo *a, const CellInfo *b) { + return p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy; + }); + } + LegaliserRegion rl, rr; + rl.id = int(regions.size()); + rl.x0 = r.x0; + rl.y0 = r.y0; + rl.x1 = dir ? r.x1 : best_tgt_cut; + rl.y1 = dir ? best_tgt_cut : r.y1; + rl.cells = left_cells; + rl.bels = left_bels; + rr.id = int(regions.size()) + 1; + rr.x0 = dir ? r.x0 : (best_tgt_cut + 1); + rr.y0 = dir ? (best_tgt_cut + 1) : r.y0; + rr.x1 = r.x1; + rr.y1 = r.y1; + rr.cells = right_cells; + rr.bels = right_bels; + regions.push_back(rl); + regions.push_back(rr); + for (int x = rl.x0; x <= rl.x1; x++) + for (int y = rl.y0; y <= rl.y1; y++) + groups.at(x).at(y) = rl.id; + for (int x = rr.x0; x <= rr.x1; x++) + for (int y = rr.y0; y <= rr.y1; y++) + groups.at(x).at(y) = rr.id; + return std::make_pair(rl.id, rr.id); + }; }; typedef decltype(CellInfo::udata) cell_udata_t; From 030b02588b9edcfbfd4de6ee44a2bc84220846e3 Mon Sep 17 00:00:00 2001 From: David Shah Date: Wed, 23 Jan 2019 14:25:34 +0000 Subject: [PATCH 28/59] HeAP: Make cut-based spreading recursive Signed-off-by: David Shah --- common/placer_heap.cc | 73 +++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 2b6fc161..a8013e24 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -144,14 +144,14 @@ class HeAPPlacer } // legalise_with_cuts(true); - CutLegaliser(this, ctx->id("ICESTORM_LC")).run(); - NPNR_ASSERT(false); + // CutLegaliser(this, ctx->id("ICESTORM_LC")).run(); + //NPNR_ASSERT(false); bool valid = false; - wirelen_t solved_hpwl = 0, legal_hpwl = 1, best_hpwl = std::numeric_limits::max(); + wirelen_t solved_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits::max(); int iter = 0, stalled = 0; while (!valid || (stalled < 5 && (solved_hpwl < legal_hpwl * 0.8))) { - if ((solved_hpwl < legal_hpwl * 0.8) || (stalled > 5)) { + if (!valid && ((solved_hpwl > legal_hpwl * 0.8) || (stalled > 5))) { stalled = 0; best_hpwl = std::numeric_limits::max(); valid = true; @@ -171,11 +171,15 @@ class HeAPPlacer log_info("Solved HPWL = %d\n", int(solved_hpwl)); update_all_chains(); + CutLegaliser(this, ctx->id("ICESTORM_LC")).run(); + update_all_chains(); + legal_hpwl = total_hpwl(); + log_info("Spread HPWL = %d\n", int(legal_hpwl)); legalise_placement_simple(valid); update_all_chains(); legal_hpwl = total_hpwl(); - log_info("Legalised HPWL = %d\n", int(legal_hpwl)); + log_info("Legalised HPWL = %d (%s)\n", int(legal_hpwl), valid ? "valid" : "invalid"); if (legal_hpwl < best_hpwl) { best_hpwl = legal_hpwl; stalled = 0; @@ -715,12 +719,26 @@ class HeAPPlacer init(); find_overused_regions(); expand_regions(); + std::queue> workqueue; for (auto &r : regions) { if (merged_regions.count(r.id)) continue; - log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, - r.bels); - cut_region(r, false); + /*log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, + r.bels);*/ + workqueue.emplace(r.id, false); + //cut_region(r, false); + } + while (!workqueue.empty()) { + auto front = workqueue.front(); + workqueue.pop(); + auto &r = regions.at(front.first); + /*log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, + r.bels);*/ + auto res = cut_region(r, front.second); + if (res) { + workqueue.emplace(res->first, !front.second); + workqueue.emplace(res->second, !front.second); + } } } @@ -989,11 +1007,22 @@ class HeAPPlacer { cut_cells.clear(); auto &cal = dir ? cells_at_location_sy : cells_at_location_sx; - for (int x = r.x0; x <= r.x1; x++) { + if (dir) { for (int y = r.y0; y <= r.y1; y++) { - std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells)); + for (int x = r.x0; x <= r.x1; x++) { + //log_info("%d\n", int(cal.at(x).at(y).size())); + std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells)); + } + } + } else { + for (int x = r.x0; x <= r.x1; x++) { + for (int y = r.y0; y <= r.y1; y++) { + std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells)); + } } } + if (cut_cells.empty()) + return {}; // Find the cells midpoint, counting chains in terms of their total size - making the initial source cut int pivot_cells = 0; int pivot = 0; @@ -1003,7 +1032,9 @@ class HeAPPlacer break; pivot++; } - log_info("orig pivot %d lc %d rc %d\n", pivot, pivot_cells, r.cells - pivot_cells); + if (pivot == int(cut_cells.size())) + pivot = int(cut_cells.size()) - 1; + //log_info("orig pivot %d lc %d rc %d\n", pivot, pivot_cells, r.cells - pivot_cells); // Find the clearance required either side of the pivot int clearance_l = 0, clearance_r = 0; @@ -1048,7 +1079,7 @@ class HeAPPlacer break; trimmed_r--; } - log_info("tl %d tr %d cl %d cr %d\n", trimmed_l, trimmed_r, clearance_l, clearance_r); + //log_info("tl %d tr %d cl %d cr %d\n", trimmed_l, trimmed_r, clearance_l, clearance_r); if ((trimmed_r - trimmed_l + 1) <= std::max(clearance_l, clearance_r)) return {}; // Now find the initial target cut that minimises utilisation imbalance, whilst @@ -1079,7 +1110,7 @@ class HeAPPlacer NPNR_ASSERT(best_tgt_cut != -1); left_bels = target_cut_bels.first; right_bels = target_cut_bels.second; - log_info("pivot %d target cut %d lc %d lb %d rc %d rb %d\n", pivot, best_tgt_cut, left_cells, left_bels, right_cells, right_bels); + //log_info("pivot %d target cut %d lc %d lb %d rc %d rb %d\n", pivot, best_tgt_cut, left_cells, left_bels, right_cells, right_bels); // Peturb the source cut to eliminate overutilisation while (pivot > 0 && (left_cells > left_bels)) { @@ -1096,10 +1127,18 @@ class HeAPPlacer right_cells -= size; pivot++; } - log_info("peturbed pivot %d lc %d lb %d rc %d rb %d\n", pivot, left_cells, left_bels, right_cells, right_bels); + //log_info("peturbed pivot %d lc %d lb %d rc %d rb %d\n", pivot, left_cells, left_bels, right_cells, right_bels); // Split regions into bins, and then spread cells by linear interpolation within those bins auto spread_binlerp = [&](int cells_start, int cells_end, double area_l, double area_r) { - int N = 1 + cells_end - cells_start; + int N = cells_end - cells_start; + if (N <= 2) { + for (int i = cells_start; i < cells_end; i++) { + auto &pos = dir ? p->cell_locs.at(cut_cells.at(i)->name).rawy + : p->cell_locs.at(cut_cells.at(i)->name).rawx; + pos = area_l + i * ((area_r - area_l) / N); + } + return; + } // Split region into up to 10 (K) bins int K = std::min(N, 10); std::vector> bin_bounds; // [start, end] @@ -1120,7 +1159,6 @@ class HeAPPlacer auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy : p->cell_locs.at(cut_cells.at(j)->name).rawx; pos = bl.second + m * (pos - orig_left); - log_info("spread pos %f\n", pos); } } }; @@ -1135,9 +1173,10 @@ class HeAPPlacer for (auto cell : cut_cells) { auto &cl = p->cell_locs.at(cell->name); cl.x = std::min(r.x1, std::max(r.x0, int(cl.rawx + 0.5))); - cl.y = std::min(r.y1, std::max(r.y1, int(cl.rawy + 0.5))); + cl.y = std::min(r.y1, std::max(r.y0, int(cl.rawy + 0.5))); cells_at_location_sx.at(cl.x).at(cl.y).push_back(cell); cells_at_location_sy.at(cl.x).at(cl.y).push_back(cell); + //log_info("spread pos %d %d\n", cl.x, cl.y); } for (int x = r.x0; x <= r.x1; x++) for (int y = r.y0; y <= r.y1; y++) { From 0570cb7ae99314536878f85280143212f1c1bfab Mon Sep 17 00:00:00 2001 From: David Shah Date: Wed, 23 Jan 2019 15:02:49 +0000 Subject: [PATCH 29/59] HeAP: Spreading working acceptably Signed-off-by: David Shah --- common/placer_heap.cc | 115 +++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 64 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index a8013e24..3e98b937 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -29,6 +29,7 @@ #include #include #include +#include #include "log.h" #include "nextpnr.h" #include "place_common.h" @@ -147,10 +148,10 @@ class HeAPPlacer // CutLegaliser(this, ctx->id("ICESTORM_LC")).run(); //NPNR_ASSERT(false); - bool valid = false; + bool valid = true; wirelen_t solved_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits::max(); int iter = 0, stalled = 0; - while (!valid || (stalled < 5 && (solved_hpwl < legal_hpwl * 0.8))) { + while (!valid || (stalled < 5 && (solved_hpwl <= legal_hpwl * 0.8))) { if (!valid && ((solved_hpwl > legal_hpwl * 0.8) || (stalled > 5))) { stalled = 0; best_hpwl = std::numeric_limits::max(); @@ -511,7 +512,7 @@ class HeAPPlacer int o_pos = cell_pos(other->cell); // if (o_pos == this_pos) // return; // FIXME: or clamp to 1? - double weight = 1. / (ni->users.size() * std::max(1, std::abs(o_pos - this_pos))); + double weight = 1.0 / (ni->users.size() * std::max(1, std::abs(o_pos - this_pos))); // FIXME: add criticality to weighting // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, @@ -526,7 +527,7 @@ class HeAPPlacer }); } if (iter != -1) { - const float alpha = 0.3; + const float alpha = 0.1; float weight = alpha * iter; for (size_t row = 0; row < solve_cells.size(); row++) { // Add an arc from legalised to current position @@ -547,10 +548,10 @@ class HeAPPlacer for (size_t i = 0; i < vals.size(); i++) if (yaxis) { cell_locs.at(solve_cells.at(i)->name).rawy = vals.at(i); - cell_locs.at(solve_cells.at(i)->name).y = std::min(max_y, std::max(0, int(vals.at(i) + 0.5))); + cell_locs.at(solve_cells.at(i)->name).y = std::min(max_y, std::max(0, int(vals.at(i)))); } else { cell_locs.at(solve_cells.at(i)->name).rawx = vals.at(i); - cell_locs.at(solve_cells.at(i)->name).x = std::min(max_x, std::max(0, int(vals.at(i) + 0.5))); + cell_locs.at(solve_cells.at(i)->name).x = std::min(max_x, std::max(0, int(vals.at(i)))); } } @@ -631,7 +632,7 @@ class HeAPPlacer while (!placed) { int nx = ctx->rng(2 * radius + 1) + std::max(cell_locs.at(ci->name).x - radius, 0); - int ny = ctx->rng(2 * radius + 1) + std::max(cell_locs.at(ci->name).x - radius, 0); + int ny = ctx->rng(2 * radius + 1) + std::max(cell_locs.at(ci->name).y - radius, 0); iter++; if ((iter % (20 * (radius + 1))) == 0) @@ -713,13 +714,17 @@ class HeAPPlacer : p(p), ctx(p->ctx), beltype(beltype), fb(p->fast_bels.at(std::get<0>(p->bel_types.at(beltype)))) { } - + static int seq; void run() { init(); find_overused_regions(); expand_regions(); std::queue> workqueue; + std::vector> orig; + if (ctx->debug) + for (auto c : p->solve_cells) + orig.emplace_back(p->cell_locs[c->name].rawx, p->cell_locs[c->name].rawy); for (auto &r : regions) { if (merged_regions.count(r.id)) continue; @@ -732,14 +737,23 @@ class HeAPPlacer auto front = workqueue.front(); workqueue.pop(); auto &r = regions.at(front.first); - /*log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, - r.bels);*/ + //log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, + //r.bels); auto res = cut_region(r, front.second); if (res) { workqueue.emplace(res->first, !front.second); workqueue.emplace(res->second, !front.second); } } + if (ctx->debug) { + std::ofstream sp("spread" + std::to_string(seq) + ".csv"); + for (size_t i = 0; i < p->solve_cells.size(); i++) { + auto &c = p->solve_cells.at(i); + sp << orig.at(i).first << "," << orig.at(i).second << "," << p->cell_locs[c->name].rawx << "," << p->cell_locs[c->name].rawy << std::endl; + } + + ++seq; + } } private: @@ -756,8 +770,7 @@ class HeAPPlacer std::vector regions; std::unordered_set merged_regions; // Cells at a location, sorted by real (not integer) x and y - std::vector>> cells_at_location_sx; - std::vector>> cells_at_location_sy; + std::vector>> cells_at_location; int occ_at(int x, int y) { return occupancy.at(x).at(y); } @@ -773,8 +786,7 @@ class HeAPPlacer occupancy.resize(p->max_x + 1, std::vector(p->max_y + 1, 0)); groups.resize(p->max_x + 1, std::vector(p->max_y + 1, -1)); chaines.resize(p->max_x + 1, std::vector(p->max_y + 1)); - cells_at_location_sx.resize(p->max_x + 1, std::vector>(p->max_y + 1)); - cells_at_location_sy.resize(p->max_x + 1, std::vector>(p->max_y + 1)); + cells_at_location.resize(p->max_x + 1, std::vector>(p->max_y + 1)); for (int x = 0; x <= p->max_x; x++) for (int y = 0; y <= p->max_y; y++) { occupancy.at(x).at(y) = 0; @@ -819,23 +831,11 @@ class HeAPPlacer } } for (auto cell : p->solve_cells) { - cells_at_location_sx.at(p->cell_locs.at(cell->name).x) - .at(p->cell_locs.at(cell->name).y) - .push_back(cell); - cells_at_location_sy.at(p->cell_locs.at(cell->name).x) + cells_at_location.at(p->cell_locs.at(cell->name).x) .at(p->cell_locs.at(cell->name).y) .push_back(cell); } - for (auto &col : cells_at_location_sx) - for (auto &loc : col) - std::sort(loc.begin(), loc.end(), [&](const CellInfo *a, const CellInfo *b) { - return p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx; - }); - for (auto &col : cells_at_location_sy) - for (auto &loc : col) - std::sort(loc.begin(), loc.end(), [&](const CellInfo *a, const CellInfo *b) { - return p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy; - }); + } void merge_regions(LegaliserRegion &merged, LegaliserRegion &mergee) { @@ -1006,21 +1006,18 @@ class HeAPPlacer boost::optional> cut_region(LegaliserRegion &r, bool dir) { cut_cells.clear(); - auto &cal = dir ? cells_at_location_sy : cells_at_location_sx; - if (dir) { + auto &cal = cells_at_location; + + for (int x = r.x0; x <= r.x1; x++) { for (int y = r.y0; y <= r.y1; y++) { - for (int x = r.x0; x <= r.x1; x++) { - //log_info("%d\n", int(cal.at(x).at(y).size())); - std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells)); - } - } - } else { - for (int x = r.x0; x <= r.x1; x++) { - for (int y = r.y0; y <= r.y1; y++) { - std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells)); - } + std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells)); } } + + std::sort(cut_cells.begin(), cut_cells.end(), [&](const CellInfo *a, const CellInfo *b) { + return dir ? (p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy) : (p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx); + }); + if (cut_cells.empty()) return {}; // Find the cells midpoint, counting chains in terms of their total size - making the initial source cut @@ -1113,14 +1110,14 @@ class HeAPPlacer //log_info("pivot %d target cut %d lc %d lb %d rc %d rb %d\n", pivot, best_tgt_cut, left_cells, left_bels, right_cells, right_bels); // Peturb the source cut to eliminate overutilisation - while (pivot > 0 && (left_cells > left_bels)) { + while (pivot > 0 && (double(left_cells) / double(left_bels) > double(right_cells) / double(right_bels))) { auto &move_cell = cut_cells.at(pivot); int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; left_cells -= size; right_cells += size; pivot--; } - while (pivot < (int(cut_cells.size()) - 1) && (right_cells > right_bels)) { + while (pivot < int(cut_cells.size()) - 1 && (double(left_cells) / double(left_bels) < double(right_cells) / double(right_bels))) { auto &move_cell = cut_cells.at(pivot + 1); int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; left_cells += size; @@ -1145,19 +1142,22 @@ class HeAPPlacer bin_bounds.emplace_back(cells_start, area_l); for (int i = 1; i < K; i++) bin_bounds.emplace_back(cells_start + (N * i) / K, - area_l + ((area_r - area_l + 0.4) * i) / K); - bin_bounds.emplace_back(cells_end, area_r + 0.4); - + area_l + ((area_r - area_l + 0.9) * i) / K); + bin_bounds.emplace_back(cells_end, area_r + 0.9); + //log("bins "); + //for (auto b : bin_bounds) log("%d, %.01f; ", b.first, b.second); + //log("\n"); for (int i = 0; i < K; i++) { auto &bl = bin_bounds.at(i), br = bin_bounds.at(i + 1); double orig_left = dir ? p->cell_locs.at(cut_cells.at(bl.first)->name).rawy : p->cell_locs.at(cut_cells.at(bl.first)->name).rawx; double orig_right = dir ? p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawy : p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawx; - double m = (br.second - bl.second) / (orig_right - orig_left); + double m = (br.second - bl.second) / (1 + orig_right - orig_left); for (int j = bl.first; j < br.first; j++) { auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy : p->cell_locs.at(cut_cells.at(j)->name).rawx; + NPNR_ASSERT(pos >= orig_left && pos <= orig_right); pos = bl.second + m * (pos - orig_left); } } @@ -1167,28 +1167,15 @@ class HeAPPlacer // Update various data structures for (int x = r.x0; x <= r.x1; x++) for (int y = r.y0; y <= r.y1; y++) { - cells_at_location_sx.at(x).at(y).clear(); - cells_at_location_sy.at(x).at(y).clear(); + cells_at_location.at(x).at(y).clear(); } for (auto cell : cut_cells) { auto &cl = p->cell_locs.at(cell->name); - cl.x = std::min(r.x1, std::max(r.x0, int(cl.rawx + 0.5))); - cl.y = std::min(r.y1, std::max(r.y0, int(cl.rawy + 0.5))); - cells_at_location_sx.at(cl.x).at(cl.y).push_back(cell); - cells_at_location_sy.at(cl.x).at(cl.y).push_back(cell); + cl.x = std::min(r.x1, std::max(r.x0, int(cl.rawx))); + cl.y = std::min(r.y1, std::max(r.y0, int(cl.rawy))); + cells_at_location.at(cl.x).at(cl.y).push_back(cell); //log_info("spread pos %d %d\n", cl.x, cl.y); } - for (int x = r.x0; x <= r.x1; x++) - for (int y = r.y0; y <= r.y1; y++) { - auto &sx = cells_at_location_sx.at(x).at(y); - std::sort(sx.begin(), sx.end(), [&](const CellInfo *a, const CellInfo *b) { - return p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx; - }); - auto &sy = cells_at_location_sy.at(x).at(y); - std::sort(sy.begin(), sy.end(), [&](const CellInfo *a, const CellInfo *b) { - return p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy; - }); - } LegaliserRegion rl, rr; rl.id = int(regions.size()); rl.x0 = r.x0; @@ -1215,10 +1202,10 @@ class HeAPPlacer return std::make_pair(rl.id, rr.id); }; }; - typedef decltype(CellInfo::udata) cell_udata_t; cell_udata_t dont_solve = std::numeric_limits::max(); }; +int HeAPPlacer::CutLegaliser::seq = 0; bool placer_heap(Context *ctx) { return HeAPPlacer(ctx).place(); } From f3d9b453876e02da94c0534d732a35a04e4e58f0 Mon Sep 17 00:00:00 2001 From: David Shah Date: Wed, 23 Jan 2019 16:36:53 +0000 Subject: [PATCH 30/59] HeAP: Add SA-based iterative refinement after AP Signed-off-by: David Shah --- common/placer1.cc | 171 +++++++++++++++++++++++++----------------- common/placer1.h | 1 + common/placer_heap.cc | 62 +++++++++------ 3 files changed, 145 insertions(+), 89 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index 767dbae6..ffa3aa75 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -147,85 +147,102 @@ class SAPlacer net.second->udata = old_udata[net.second->udata]; } - bool place() + bool place(bool refine = false) { log_break(); ctx->lock(); size_t placed_cells = 0; - // Initial constraints placer - for (auto &cell_entry : ctx->cells) { - CellInfo *cell = cell_entry.second.get(); - auto loc = cell->attrs.find(ctx->id("BEL")); - if (loc != cell->attrs.end()) { - std::string loc_name = loc->second; - BelId bel = ctx->getBelByName(ctx->id(loc_name)); - if (bel == BelId()) { - log_error("No Bel named \'%s\' located for " - "this chip (processing BEL attribute on \'%s\')\n", - loc_name.c_str(), cell->name.c_str(ctx)); - } - - IdString bel_type = ctx->getBelType(bel); - if (bel_type != cell->type) { - log_error("Bel \'%s\' of type \'%s\' does not match cell " - "\'%s\' of type \'%s\'\n", - loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); - } - if (!ctx->isValidBelForCell(cell, bel)) { - log_error("Bel \'%s\' of type \'%s\' is not valid for cell " - "\'%s\' of type \'%s\'\n", - loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); - } - - auto bound_cell = ctx->getBoundBelCell(bel); - if (bound_cell) { - log_error("Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n", - cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx)); - } - - ctx->bindBel(bel, cell, STRENGTH_USER); - locked_bels.insert(bel); - placed_cells++; - } - } - int constr_placed_cells = placed_cells; - log_info("Placed %d cells based on constraints.\n", int(placed_cells)); - ctx->yield(); - - // Sort to-place cells for deterministic initial placement std::vector autoplaced; std::vector chain_basis; + if (!refine) { + // Initial constraints placer + for (auto &cell_entry : ctx->cells) { + CellInfo *cell = cell_entry.second.get(); + auto loc = cell->attrs.find(ctx->id("BEL")); + if (loc != cell->attrs.end()) { + std::string loc_name = loc->second; + BelId bel = ctx->getBelByName(ctx->id(loc_name)); + if (bel == BelId()) { + log_error("No Bel named \'%s\' located for " + "this chip (processing BEL attribute on \'%s\')\n", + loc_name.c_str(), cell->name.c_str(ctx)); + } - for (auto &cell : ctx->cells) { - CellInfo *ci = cell.second.get(); - if (ci->bel == BelId()) { - autoplaced.push_back(cell.second.get()); + IdString bel_type = ctx->getBelType(bel); + if (bel_type != cell->type) { + log_error("Bel \'%s\' of type \'%s\' does not match cell " + "\'%s\' of type \'%s\'\n", + loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); + } + if (!ctx->isValidBelForCell(cell, bel)) { + log_error("Bel \'%s\' of type \'%s\' is not valid for cell " + "\'%s\' of type \'%s\'\n", + loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx)); + } + + auto bound_cell = ctx->getBoundBelCell(bel); + if (bound_cell) { + log_error("Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n", + cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx)); + } + + ctx->bindBel(bel, cell, STRENGTH_USER); + locked_bels.insert(bel); + placed_cells++; + } } - } - std::sort(autoplaced.begin(), autoplaced.end(), [](CellInfo *a, CellInfo *b) { return a->name < b->name; }); - ctx->shuffle(autoplaced); - auto iplace_start = std::chrono::high_resolution_clock::now(); - // Place cells randomly initially - log_info("Creating initial placement for remaining %d cells.\n", int(autoplaced.size())); + int constr_placed_cells = placed_cells; + log_info("Placed %d cells based on constraints.\n", int(placed_cells)); + ctx->yield(); - for (auto cell : autoplaced) { - place_initial(cell); - placed_cells++; - if ((placed_cells - constr_placed_cells) % 500 == 0) + // Sort to-place cells for deterministic initial placement + + + for (auto &cell : ctx->cells) { + CellInfo *ci = cell.second.get(); + if (ci->bel == BelId()) { + autoplaced.push_back(cell.second.get()); + } + } + std::sort(autoplaced.begin(), autoplaced.end(), [](CellInfo *a, CellInfo *b) { return a->name < b->name; }); + ctx->shuffle(autoplaced); + auto iplace_start = std::chrono::high_resolution_clock::now(); + // Place cells randomly initially + log_info("Creating initial placement for remaining %d cells.\n", int(autoplaced.size())); + + for (auto cell : autoplaced) { + place_initial(cell); + placed_cells++; + if ((placed_cells - constr_placed_cells) % 500 == 0) + log_info(" initial placement placed %d/%d cells\n", int(placed_cells - constr_placed_cells), + int(autoplaced.size())); + } + if ((placed_cells - constr_placed_cells) % 500 != 0) log_info(" initial placement placed %d/%d cells\n", int(placed_cells - constr_placed_cells), int(autoplaced.size())); + if (cfg.budgetBased && ctx->slack_redist_iter > 0) + assign_budget(ctx); + ctx->yield(); + auto iplace_end = std::chrono::high_resolution_clock::now(); + log_info("Initial placement time %.02fs\n", std::chrono::duration(iplace_end - iplace_start).count()); + log_info("Running simulated annealing placer.\n"); + } else { + for (auto &cell : ctx->cells) { + CellInfo *ci = cell.second.get(); + if (ci->belStrength > STRENGTH_STRONG) + continue; + else if (ci->constr_parent != nullptr) + continue; + else if (!ci->constr_children.empty() || ci->constr_z != ci->UNCONSTR) + chain_basis.push_back(ci); + else + autoplaced.push_back(ci); + } + require_legal = false; + diameter = 3; } - if ((placed_cells - constr_placed_cells) % 500 != 0) - log_info(" initial placement placed %d/%d cells\n", int(placed_cells - constr_placed_cells), - int(autoplaced.size())); - if (cfg.budgetBased && ctx->slack_redist_iter > 0) - assign_budget(ctx); - ctx->yield(); - auto iplace_end = std::chrono::high_resolution_clock::now(); - log_info("Initial placement time %.02fs\n", std::chrono::duration(iplace_end - iplace_start).count()); auto saplace_start = std::chrono::high_resolution_clock::now(); - log_info("Running simulated annealing placer.\n"); // Invoke timing analysis to obtain criticalities if (!cfg.budgetBased) @@ -242,7 +259,7 @@ class SAPlacer wirelen_t min_wirelen = curr_wirelen_cost; int n_no_progress = 0; - temp = cfg.startTemp; + temp = refine ? 1e-8 : cfg.startTemp; // Main simulated annealing loop for (int iter = 1;; iter++) { @@ -284,7 +301,7 @@ class SAPlacer else n_no_progress++; - if (temp <= 1e-7 && n_no_progress >= 5) { + if (temp <= 1e-7 && n_no_progress >= (refine ? 1 : 5)) { log_info(" at iteration #%d: temp = %f, timing cost = " "%.0f, wirelen = %.0f \n", iter, temp, double(curr_timing_cost), double(curr_wirelen_cost)); @@ -934,4 +951,24 @@ bool placer1(Context *ctx, Placer1Cfg cfg) } } +bool placer1_refine(Context *ctx, Placer1Cfg cfg) { + try { + SAPlacer placer(ctx, cfg); + placer.place(true); + log_info("Checksum: 0x%08x\n", ctx->checksum()); +#ifndef NDEBUG + ctx->lock(); + ctx->check(); + ctx->unlock(); +#endif + return true; + } catch (log_execution_error_exception) { +#ifndef NDEBUG + ctx->check(); +#endif + return false; + } +} + + NEXTPNR_NAMESPACE_END diff --git a/common/placer1.h b/common/placer1.h index a0eabbb0..4c7c7339 100644 --- a/common/placer1.h +++ b/common/placer1.h @@ -35,6 +35,7 @@ struct Placer1Cfg : public Settings }; extern bool placer1(Context *ctx, Placer1Cfg cfg); +extern bool placer1_refine(Context *ctx, Placer1Cfg cfg); NEXTPNR_NAMESPACE_END diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 3e98b937..7e8323ca 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -34,6 +34,7 @@ #include "nextpnr.h" #include "place_common.h" #include "placer_math.h" +#include "placer1.h" #include "util.h" NEXTPNR_NAMESPACE_BEGIN @@ -191,6 +192,9 @@ class HeAPPlacer ++iter; } ctx->unlock(); + + placer1_refine(ctx, Placer1Cfg(ctx)); + return true; } @@ -355,14 +359,17 @@ class HeAPPlacer // FIXME: Are there better approaches to the initial placement (e.g. greedy?) void seed_placement() { - std::unordered_map> available_bels; + std::unordered_map> available_bels; for (auto bel : ctx->getBels()) { if (!ctx->checkBelAvail(bel)) continue; available_bels[ctx->getBelType(bel)].push_back(bel); } - for (auto &ab : available_bels) - ctx->shuffle(ab.second); + for (auto &t : available_bels) { + std::random_shuffle(t.second.begin(), t.second.end(), [&](size_t n){ + return ctx->rng(int(n)); + }); + } for (auto cell : sorted(ctx->cells)) { CellInfo *ci = cell.second; if (ci->bel != BelId()) { @@ -372,23 +379,34 @@ class HeAPPlacer cell_locs[cell.first].locked = true; cell_locs[cell.first].global = ctx->getBelGlobalBuf(ci->bel); } else if (ci->constr_parent == nullptr) { - if (!available_bels.count(ci->type) || available_bels.at(ci->type).empty()) - log_error("Unable to place cell '%s', no Bels remaining of type '%s'\n", ci->name.c_str(ctx), - ci->type.c_str(ctx)); - BelId bel = available_bels.at(ci->type).back(); - available_bels.at(ci->type).pop_back(); - Loc loc = ctx->getBelLocation(bel); - cell_locs[cell.first].x = loc.x; - cell_locs[cell.first].y = loc.y; - cell_locs[cell.first].locked = false; - cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel); - // FIXME - if (has_connectivity(cell.second) && cell.second->type != ctx->id("SB_IO")) { - place_cells.push_back(ci); - } else { - ctx->bindBel(bel, ci, STRENGTH_STRONG); - cell_locs[cell.first].locked = true; + bool placed = false; + while (!placed) { + if (!available_bels.count(ci->type) || available_bels.at(ci->type).empty()) + log_error("Unable to place cell '%s', no Bels remaining of type '%s'\n", ci->name.c_str(ctx), + ci->type.c_str(ctx)); + BelId bel = available_bels.at(ci->type).back(); + available_bels.at(ci->type).pop_back(); + Loc loc = ctx->getBelLocation(bel); + cell_locs[cell.first].x = loc.x; + cell_locs[cell.first].y = loc.y; + cell_locs[cell.first].locked = false; + cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel); + // FIXME + if (has_connectivity(cell.second) && cell.second->type != ctx->id("SB_IO")) { + place_cells.push_back(ci); + placed = true; + } else { + if (ctx->isValidBelForCell(ci, bel)) { + ctx->bindBel(bel, ci, STRENGTH_STRONG); + cell_locs[cell.first].locked = true; + placed = true; + } else { + available_bels.at(ci->type).push_front(bel); + } + + } } + } } } @@ -728,8 +746,8 @@ class HeAPPlacer for (auto &r : regions) { if (merged_regions.count(r.id)) continue; - /*log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, - r.bels);*/ + log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, + r.bels); workqueue.emplace(r.id, false); //cut_region(r, false); } @@ -865,7 +883,7 @@ class HeAPPlacer auto process_location = [&](int x, int y) { // Merge with any overlapping regions - if (groups.at(x).at(y) != r.id) { + if (groups.at(x).at(y) == -1) { r.bels += bels_at(x, y); r.cells += occ_at(x, y); } From 2a0c117662d26ce36ccda4d71b0d3617afc8bf80 Mon Sep 17 00:00:00 2001 From: David Shah Date: Thu, 24 Jan 2019 13:36:23 +0000 Subject: [PATCH 31/59] HeAP: Add performance counters Signed-off-by: David Shah --- common/placer_heap.cc | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 7e8323ca..84e5c2f1 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -30,6 +30,7 @@ #include #include #include +#include #include "log.h" #include "nextpnr.h" #include "place_common.h" @@ -118,6 +119,8 @@ class HeAPPlacer HeAPPlacer(Context *ctx) : ctx(ctx) {} bool place() { + auto startt = std::chrono::high_resolution_clock::now(); + ctx->lock(); taucif_init_solver(); place_constraints(); @@ -158,6 +161,7 @@ class HeAPPlacer best_hpwl = std::numeric_limits::max(); valid = true; } + setup_solve_cells(); EquationSystem esx(solve_cells.size(), solve_cells.size()); @@ -170,10 +174,13 @@ class HeAPPlacer // log_info("y-axis\n"); solve_equations(esy, true); solved_hpwl = total_hpwl(); + log_info("Solved HPWL = %d\n", int(solved_hpwl)); update_all_chains(); CutLegaliser(this, ctx->id("ICESTORM_LC")).run(); + CutLegaliser(this, ctx->id("ICESTORM_RAM")).run(); + update_all_chains(); legal_hpwl = total_hpwl(); log_info("Spread HPWL = %d\n", int(legal_hpwl)); @@ -192,7 +199,11 @@ class HeAPPlacer ++iter; } ctx->unlock(); - + auto endtt = std::chrono::high_resolution_clock::now(); + log_info("HeAP Placer Time: %.02fs\n", std::chrono::duration(endtt - startt).count()); + log_info(" of which solving equations: %.02fs\n", solve_time); + log_info(" of which coarse legalisation: %.02fs\n", cl_time); + log_info(" of which strict legalisation: %.02fs\n", sl_time); placer1_refine(ctx, Placer1Cfg(ctx)); return true; @@ -237,6 +248,9 @@ class HeAPPlacer // The offset from chain_root to a cell in the chain std::unordered_map> cell_offsets; + // Performance counting + double solve_time = 0, cl_time = 0, sl_time = 0; + // Place cells with the BEL attribute set to constrain them void place_constraints() { @@ -530,7 +544,7 @@ class HeAPPlacer int o_pos = cell_pos(other->cell); // if (o_pos == this_pos) // return; // FIXME: or clamp to 1? - double weight = 1.0 / (ni->users.size() * std::max(1, std::abs(o_pos - this_pos))); + double weight = 1.0 / (ni->users.size() * std::max(1, std::abs(o_pos - this_pos))); // FIXME: add criticality to weighting // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, @@ -545,7 +559,7 @@ class HeAPPlacer }); } if (iter != -1) { - const float alpha = 0.1; + const float alpha = 0.05; float weight = alpha * iter; for (size_t row = 0; row < solve_cells.size(); row++) { // Add an arc from legalised to current position @@ -558,6 +572,7 @@ class HeAPPlacer // Build the system of equations for either X or Y void solve_equations(EquationSystem &es, bool yaxis) { + auto startt = std::chrono::high_resolution_clock::now(); // Return the x or y position of a cell, depending on ydir auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; std::vector vals; @@ -571,6 +586,8 @@ class HeAPPlacer cell_locs.at(solve_cells.at(i)->name).rawx = vals.at(i); cell_locs.at(solve_cells.at(i)->name).x = std::min(max_x, std::max(0, int(vals.at(i)))); } + auto endt = std::chrono::high_resolution_clock::now(); + solve_time += std::chrono::duration(endt - startt).count(); } // Compute HPWL @@ -619,6 +636,8 @@ class HeAPPlacer // validity rules for control sets, etc, rather than a CLB/tile as in a more conventional pack&place flow) void legalise_placement_simple(bool require_validity = false) { + auto startt = std::chrono::high_resolution_clock::now(); + // Unbind all cells placed in this solution for (auto cell : sorted(ctx->cells)) { CellInfo *ci = cell.second; @@ -701,6 +720,8 @@ class HeAPPlacer } } } + auto endt = std::chrono::high_resolution_clock::now(); + sl_time += std::chrono::duration(endt - startt).count(); } static constexpr float beta = 0.9; @@ -735,6 +756,7 @@ class HeAPPlacer static int seq; void run() { + auto startt = std::chrono::high_resolution_clock::now(); init(); find_overused_regions(); expand_regions(); @@ -772,6 +794,8 @@ class HeAPPlacer ++seq; } + auto endt = std::chrono::high_resolution_clock::now(); + p->cl_time += std::chrono::duration(endt - startt).count(); } private: From eb638c47b3b80830a9c349f01164b1054c68ce14 Mon Sep 17 00:00:00 2001 From: David Shah Date: Thu, 24 Jan 2019 14:05:16 +0000 Subject: [PATCH 32/59] HeAP: fine tuning Signed-off-by: David Shah --- common/placer_heap.cc | 128 +++++++++++++++++++++++++++++++++--------- 1 file changed, 100 insertions(+), 28 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 84e5c2f1..c4c9ffac 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -31,6 +31,7 @@ #include #include #include +#include #include "log.h" #include "nextpnr.h" #include "place_common.h" @@ -155,6 +156,9 @@ class HeAPPlacer bool valid = true; wirelen_t solved_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits::max(); int iter = 0, stalled = 0; + + std::vector> solution; + while (!valid || (stalled < 5 && (solved_hpwl <= legal_hpwl * 0.8))) { if (!valid && ((solved_hpwl > legal_hpwl * 0.8) || (stalled > 5))) { stalled = 0; @@ -162,18 +166,24 @@ class HeAPPlacer valid = true; } - setup_solve_cells(); + for (int i = 0; i < 5; i++) { + setup_solve_cells(); - EquationSystem esx(solve_cells.size(), solve_cells.size()); - build_equations(esx, false, iter); - // log_info("x-axis\n"); - solve_equations(esx, false); + EquationSystem esx(solve_cells.size(), solve_cells.size()); + build_equations(esx, false, (iter == 0) ? -1 : iter); + // log_info("x-axis\n"); + solve_equations(esx, false); - EquationSystem esy(solve_cells.size(), solve_cells.size()); - build_equations(esy, true, iter); - // log_info("y-axis\n"); - solve_equations(esy, true); - solved_hpwl = total_hpwl(); + EquationSystem esy(solve_cells.size(), solve_cells.size()); + build_equations(esy, true, (iter == 0) ? -1 : iter); + // log_info("y-axis\n"); + solve_equations(esy, true); + + update_all_chains(); + + solved_hpwl = total_hpwl(); + log_info("Initial placer iter %d, hpwl = %d\n", i, int(solved_hpwl)); + } log_info("Solved HPWL = %d\n", int(solved_hpwl)); @@ -192,12 +202,40 @@ class HeAPPlacer if (legal_hpwl < best_hpwl) { best_hpwl = legal_hpwl; stalled = 0; + + if (valid) { + // Save solution + solution.clear(); + for (auto cell : sorted(ctx->cells)) { + solution.emplace_back(cell.second, cell.second->bel, cell.second->belStrength); + } + } + } else { ++stalled; } + for (auto &cl : cell_locs) { + cl.second.legal_x = cl.second.x; + cl.second.legal_y = cl.second.y; + } ctx->yield(); ++iter; } + + // Apply saved solution + for (auto &sc : solution) { + CellInfo *cell = std::get<0>(sc); + if (cell->bel != BelId()) + ctx->unbindBel(cell->bel); + } + for (auto &sc : solution) { + CellInfo *cell; + BelId bel; + PlaceStrength strength; + std::tie(cell, bel, strength) = sc; + ctx->bindBel(bel, cell, strength); + } + ctx->unlock(); auto endtt = std::chrono::high_resolution_clock::now(); log_info("HeAP Placer Time: %.02fs\n", std::chrono::duration(endtt - startt).count()); @@ -228,6 +266,7 @@ class HeAPPlacer struct CellLocation { int x, y; + int legal_x, legal_y; double rawx, rawy; bool locked, global; }; @@ -490,6 +529,7 @@ class HeAPPlacer { // Return the x or y position of a cell, depending on ydir auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; + auto legal_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).legal_y : cell_locs.at(cell->name).legal_x; }; es.reset(); @@ -559,12 +599,15 @@ class HeAPPlacer }); } if (iter != -1) { - const float alpha = 0.05; - float weight = alpha * iter; + const float alpha = 0.3; for (size_t row = 0; row < solve_cells.size(); row++) { + int l_pos = legal_pos(solve_cells.at(row)); + int c_pos = cell_pos(solve_cells.at(row)); + + double weight = alpha * iter / std::max(1, std::abs(l_pos - c_pos)); // Add an arc from legalised to current position es.add_coeff(row, row, weight); - es.add_rhs(row, weight * cell_pos(solve_cells.at(row))); + es.add_rhs(row, weight * l_pos); } } } @@ -677,7 +720,7 @@ class HeAPPlacer if (nx < 0 || nx > max_x) continue; - if (ny < 0 || ny > max_x) + if (ny < 0 || ny > max_y) continue; // ny = nearest_row_with_bel.at(bt).at(ny); @@ -692,7 +735,7 @@ class HeAPPlacer if (ci->constr_children.empty()) { for (auto sz : fb.at(nx).at(ny)) { - if (ctx->checkBelAvail(sz) || radius > (max_x / 4)) { + if (ctx->checkBelAvail(sz) || radius > 1) { CellInfo *bound = ctx->getBoundBelCell(sz); if (bound != nullptr) { if (bound->constr_parent != nullptr || !bound->constr_children.empty()) @@ -777,21 +820,41 @@ class HeAPPlacer auto front = workqueue.front(); workqueue.pop(); auto &r = regions.at(front.first); - //log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, - //r.bels); + if (r.cells == 0) + continue; + //log_info("%s (%d, %d) |_> (%d, %d) %d/%d %c\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, r.bels, front.second ? 'y' : 'x'); auto res = cut_region(r, front.second); if (res) { workqueue.emplace(res->first, !front.second); workqueue.emplace(res->second, !front.second); + } else { + // Try the other dir, in case stuck in one direction only + //log_info("RETRY %s (%d, %d) |_> (%d, %d) %d/%d %c\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, r.bels, front.second ? 'x' : 'y'); + + auto res2 = cut_region(r, !front.second); + if (res2) { + //log_info("RETRY SUCCESS\n"); + workqueue.emplace(res2->first, front.second); + workqueue.emplace(res2->second, front.second); + } } + } if (ctx->debug) { std::ofstream sp("spread" + std::to_string(seq) + ".csv"); for (size_t i = 0; i < p->solve_cells.size(); i++) { auto &c = p->solve_cells.at(i); + if (c->type != beltype) + continue; sp << orig.at(i).first << "," << orig.at(i).second << "," << p->cell_locs[c->name].rawx << "," << p->cell_locs[c->name].rawy << std::endl; } - + std::ofstream oc("cells" + std::to_string(seq) + ".csv"); + for (size_t y = 0; y <= p->max_y; y++) { + for (size_t x = 0; x <= p->max_x; x++) { + oc << cells_at_location.at(x).at(y).size() << ", "; + } + oc << std::endl; + } ++seq; } auto endt = std::chrono::high_resolution_clock::now(); @@ -848,8 +911,10 @@ class HeAPPlacer }; for (auto &cell : p->cell_locs) { - if (ctx->cells.at(cell.first)->type == beltype) - occupancy.at(cell.second.x).at(cell.second.y)++; + if (ctx->cells.at(cell.first)->type != beltype) + continue; + + occupancy.at(cell.second.x).at(cell.second.y)++; // Compute ultimate extent of each chain root if (p->chain_root.count(cell.first)) { set_chain_ext(p->chain_root.at(cell.first)->name, cell.second.x, cell.second.y); @@ -858,6 +923,8 @@ class HeAPPlacer } } for (auto &cell : p->cell_locs) { + if (ctx->cells.at(cell.first)->type != beltype) + continue; // Transfer chain extents to the actual chaines structure ChainExtent *ce = nullptr; if (p->chain_root.count(cell.first)) @@ -873,6 +940,8 @@ class HeAPPlacer } } for (auto cell : p->solve_cells) { + if (cell->type != beltype) + continue; cells_at_location.at(p->cell_locs.at(cell->name).x) .at(p->cell_locs.at(cell->name).y) .push_back(cell); @@ -1049,25 +1118,28 @@ class HeAPPlacer { cut_cells.clear(); auto &cal = cells_at_location; - + int total_cells = 0, total_bels = 0; for (int x = r.x0; x <= r.x1; x++) { for (int y = r.y0; y <= r.y1; y++) { std::copy(cal.at(x).at(y).begin(), cal.at(x).at(y).end(), std::back_inserter(cut_cells)); + total_bels += bels_at(x, y); } } - + for (auto &cell : cut_cells) { + total_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1; + } std::sort(cut_cells.begin(), cut_cells.end(), [&](const CellInfo *a, const CellInfo *b) { return dir ? (p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy) : (p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx); }); - if (cut_cells.empty()) + if (cut_cells.size() < 2) return {}; // Find the cells midpoint, counting chains in terms of their total size - making the initial source cut int pivot_cells = 0; int pivot = 0; for (auto &cell : cut_cells) { pivot_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1; - if (pivot_cells >= r.cells / 2) + if (pivot_cells >= total_cells / 2) break; pivot++; } @@ -1123,8 +1195,8 @@ class HeAPPlacer return {}; // Now find the initial target cut that minimises utilisation imbalance, whilst // meeting the clearance requirements for any large macros - int left_cells = pivot_cells, right_cells = r.cells - pivot_cells; - int left_bels = 0, right_bels = r.bels; + int left_cells = pivot_cells, right_cells = total_cells - pivot_cells; + int left_bels = 0, right_bels = total_bels; int best_tgt_cut = -1; double best_deltaU = std::numeric_limits::max(); std::pair target_cut_bels; @@ -1184,8 +1256,8 @@ class HeAPPlacer bin_bounds.emplace_back(cells_start, area_l); for (int i = 1; i < K; i++) bin_bounds.emplace_back(cells_start + (N * i) / K, - area_l + ((area_r - area_l + 0.9) * i) / K); - bin_bounds.emplace_back(cells_end, area_r + 0.9); + area_l + ((area_r - area_l + 0.99) * i) / K); + bin_bounds.emplace_back(cells_end, area_r + 0.99); //log("bins "); //for (auto b : bin_bounds) log("%d, %.01f; ", b.first, b.second); //log("\n"); From 05e9ae183d7d971f1df30e0cc989d20ee6f309a9 Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 25 Jan 2019 11:57:58 +0000 Subject: [PATCH 33/59] HeAP: Add multithreading Signed-off-by: David Shah --- common/placer_heap.cc | 51 ++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index c4c9ffac..884c18e0 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -32,6 +32,7 @@ #include #include #include +#include #include "log.h" #include "nextpnr.h" #include "place_common.h" @@ -130,18 +131,15 @@ class HeAPPlacer update_all_chains(); wirelen_t hpwl = total_hpwl(); log_info("Initial placer starting hpwl = %d\n", int(hpwl)); - for (int i = 0; i < 20; i++) { + for (int i = 0; i < 4; i++) { setup_solve_cells(); - EquationSystem esx(solve_cells.size(), solve_cells.size()); - build_equations(esx, false); - // log_info("x-axis\n"); - solve_equations(esx, false); + std::thread xaxis([&](){build_solve_direction(false, -1);}); + std::thread yaxis([&](){build_solve_direction(true, -1);}); + + xaxis.join(); + yaxis.join(); - EquationSystem esy(solve_cells.size(), solve_cells.size()); - build_equations(esy, true); - // log_info("y-axis\n"); - solve_equations(esy, true); update_all_chains(); @@ -165,26 +163,16 @@ class HeAPPlacer best_hpwl = std::numeric_limits::max(); valid = true; } + setup_solve_cells(); - for (int i = 0; i < 5; i++) { - setup_solve_cells(); + std::thread xaxis([&](){build_solve_direction(false, (iter == 0) ? -1 : iter);}); + std::thread yaxis([&](){build_solve_direction(true, (iter == 0) ? -1 : iter);}); - EquationSystem esx(solve_cells.size(), solve_cells.size()); - build_equations(esx, false, (iter == 0) ? -1 : iter); - // log_info("x-axis\n"); - solve_equations(esx, false); - - EquationSystem esy(solve_cells.size(), solve_cells.size()); - build_equations(esy, true, (iter == 0) ? -1 : iter); - // log_info("y-axis\n"); - solve_equations(esy, true); - - update_all_chains(); - - solved_hpwl = total_hpwl(); - log_info("Initial placer iter %d, hpwl = %d\n", i, int(solved_hpwl)); - } + xaxis.join(); + yaxis.join(); + update_all_chains(); + solved_hpwl = total_hpwl(); log_info("Solved HPWL = %d\n", int(solved_hpwl)); update_all_chains(); @@ -397,6 +385,15 @@ class HeAPPlacer } } + // Build and solve in one direction + void build_solve_direction(bool yaxis, int iter) { + for (int i = 0; i < 5; i++) { + EquationSystem esx(solve_cells.size(), solve_cells.size()); + build_equations(esx, yaxis, iter); + solve_equations(esx, yaxis); + } + } + // Check if a cell has any meaningful connectivity bool has_connectivity(CellInfo *cell) { @@ -599,7 +596,7 @@ class HeAPPlacer }); } if (iter != -1) { - const float alpha = 0.3; + const float alpha = 0.2; for (size_t row = 0; row < solve_cells.size(); row++) { int l_pos = legal_pos(solve_cells.at(row)); int c_pos = cell_pos(solve_cells.at(row)); From 3dedc6259396fd4960a9aa43e05f9011323fb7cd Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 25 Jan 2019 13:15:36 +0000 Subject: [PATCH 34/59] HeAP: Weight arcs by criticality for timing-driven placement Signed-off-by: David Shah --- common/placer_heap.cc | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 884c18e0..73aeb9b4 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -39,6 +39,7 @@ #include "placer_math.h" #include "placer1.h" #include "util.h" +#include "timing.h" NEXTPNR_NAMESPACE_BEGIN namespace { @@ -187,6 +188,10 @@ class HeAPPlacer legal_hpwl = total_hpwl(); log_info("Legalised HPWL = %d (%s)\n", int(legal_hpwl), valid ? "valid" : "invalid"); + + if (ctx->timing_driven) + get_criticalities(ctx, &net_crit); + if (legal_hpwl < best_hpwl) { best_hpwl = legal_hpwl; stalled = 0; @@ -278,6 +283,8 @@ class HeAPPlacer // Performance counting double solve_time = 0, cl_time = 0, sl_time = 0; + NetCriticalityMap net_crit; + // Place cells with the BEL attribute set to constrain them void place_constraints() { @@ -516,9 +523,9 @@ class HeAPPlacer template void foreach_port(NetInfo *net, Tf func) { if (net->driver.cell != nullptr) - func(net->driver); - for (auto &user : net->users) - func(user); + func(net->driver, -1); + for (size_t i = 0; i < net->users.size(); i++) + func(net->users.at(i), i); } // Build the system of equations for either X or Y @@ -541,7 +548,7 @@ class HeAPPlacer // Find the bounds of the net in this axis, and the ports that correspond to these bounds PortRef *lbport = nullptr, *ubport = nullptr; int lbpos = std::numeric_limits::max(), ubpos = std::numeric_limits::min(); - foreach_port(ni, [&](PortRef &port) { + foreach_port(ni, [&](PortRef &port, int user_idx) { int pos = cell_pos(port.cell); if (pos < lbpos) { lbpos = pos; @@ -573,7 +580,7 @@ class HeAPPlacer }; // Add all relevant connections to the matrix - foreach_port(ni, [&](PortRef &port) { + foreach_port(ni, [&](PortRef &port, int user_idx) { int this_pos = cell_pos(port.cell); auto process_arc = [&](PortRef *other) { if (other == &port) @@ -582,7 +589,12 @@ class HeAPPlacer // if (o_pos == this_pos) // return; // FIXME: or clamp to 1? double weight = 1.0 / (ni->users.size() * std::max(1, std::abs(o_pos - this_pos))); - // FIXME: add criticality to weighting + + if (user_idx != -1 && net_crit.count(ni->name)) { + auto &nc = net_crit.at(ni->name); + if (user_idx < int(nc.criticality.size())) + weight *= (1.0 + 20 * std::pow(nc.criticality.at(user_idx), 2)); + } // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, // if the other end isn't fixed From ba1e05f16bd719d2e760e6860342a9a25324bb0d Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 25 Jan 2019 14:04:19 +0000 Subject: [PATCH 35/59] HeAP: Implement 'all+rotate' HeAP strategy Signed-off-by: David Shah --- common/placer1.cc | 2 +- common/placer_heap.cc | 80 ++++++++++++++++++++++++++++--------------- common/placer_math.c | 4 +-- 3 files changed, 56 insertions(+), 30 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index ffa3aa75..a4906985 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -259,7 +259,7 @@ class SAPlacer wirelen_t min_wirelen = curr_wirelen_cost; int n_no_progress = 0; - temp = refine ? 1e-8 : cfg.startTemp; + temp = refine ? 1e-7 : cfg.startTemp; // Main simulated annealing loop for (int iter = 1;; iter++) { diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 73aeb9b4..106c42e9 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -134,10 +134,11 @@ class HeAPPlacer log_info("Initial placer starting hpwl = %d\n", int(hpwl)); for (int i = 0; i < 4; i++) { setup_solve_cells(); - + auto solve_startt = std::chrono::high_resolution_clock::now(); std::thread xaxis([&](){build_solve_direction(false, -1);}); std::thread yaxis([&](){build_solve_direction(true, -1);}); - + auto solve_endt = std::chrono::high_resolution_clock::now(); + solve_time += std::chrono::duration(solve_endt - solve_startt).count(); xaxis.join(); yaxis.join(); @@ -158,36 +159,59 @@ class HeAPPlacer std::vector> solution; + std::vector> heap_runs; + std::unordered_set all_celltypes; + + for (auto cell : place_cells) { + if (!all_celltypes.count(cell->type)) { + heap_runs.push_back(std::unordered_set{cell->type}); + all_celltypes.insert(cell->type); + } + } + heap_runs.push_back(all_celltypes); + while (!valid || (stalled < 5 && (solved_hpwl <= legal_hpwl * 0.8))) { if (!valid && ((solved_hpwl > legal_hpwl * 0.8) || (stalled > 5))) { stalled = 0; best_hpwl = std::numeric_limits::max(); valid = true; } - setup_solve_cells(); + for (auto &run : heap_runs) { + setup_solve_cells(&run); + if (solve_cells.empty()) + continue; + // Heuristic: don't bother with threading below a certain size + auto solve_startt = std::chrono::high_resolution_clock::now(); - std::thread xaxis([&](){build_solve_direction(false, (iter == 0) ? -1 : iter);}); - std::thread yaxis([&](){build_solve_direction(true, (iter == 0) ? -1 : iter);}); + if (solve_cells.size() < 500) { + build_solve_direction(false, (iter == 0) ? -1 : iter); + build_solve_direction(true, (iter == 0) ? -1 : iter); + } else { + std::thread xaxis([&](){build_solve_direction(false, (iter == 0) ? -1 : iter);}); + std::thread yaxis([&](){build_solve_direction(true, (iter == 0) ? -1 : iter);}); + xaxis.join(); + yaxis.join(); + } + auto solve_endt = std::chrono::high_resolution_clock::now(); + solve_time += std::chrono::duration(solve_endt - solve_startt).count(); + update_all_chains(); + solved_hpwl = total_hpwl(); + log_info("Solved HPWL = %d\n", int(solved_hpwl)); - xaxis.join(); - yaxis.join(); + update_all_chains(); + for (auto type : sorted(run)) + CutLegaliser(this, type).run(); - update_all_chains(); - solved_hpwl = total_hpwl(); - log_info("Solved HPWL = %d\n", int(solved_hpwl)); + update_all_chains(); + legal_hpwl = total_hpwl(); + log_info("Spread HPWL = %d\n", int(legal_hpwl)); + legalise_placement_simple(valid); + update_all_chains(); - update_all_chains(); - CutLegaliser(this, ctx->id("ICESTORM_LC")).run(); - CutLegaliser(this, ctx->id("ICESTORM_RAM")).run(); + legal_hpwl = total_hpwl(); + log_info("Legalised HPWL = %d (%s)\n", int(legal_hpwl), valid ? "valid" : "invalid"); - update_all_chains(); - legal_hpwl = total_hpwl(); - log_info("Spread HPWL = %d\n", int(legal_hpwl)); - legalise_placement_simple(valid); - update_all_chains(); - - legal_hpwl = total_hpwl(); - log_info("Legalised HPWL = %d (%s)\n", int(legal_hpwl), valid ? "valid" : "invalid"); + } if (ctx->timing_driven) get_criticalities(ctx, &net_crit); @@ -624,7 +648,6 @@ class HeAPPlacer // Build the system of equations for either X or Y void solve_equations(EquationSystem &es, bool yaxis) { - auto startt = std::chrono::high_resolution_clock::now(); // Return the x or y position of a cell, depending on ydir auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; std::vector vals; @@ -638,8 +661,6 @@ class HeAPPlacer cell_locs.at(solve_cells.at(i)->name).rawx = vals.at(i); cell_locs.at(solve_cells.at(i)->name).x = std::min(max_x, std::max(0, int(vals.at(i)))); } - auto endt = std::chrono::high_resolution_clock::now(); - solve_time += std::chrono::duration(endt - startt).count(); } // Compute HPWL @@ -1111,9 +1132,14 @@ class HeAPPlacer if (!reg.overused()) break; } - if (!changed) - log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0, reg.x1, - reg.y1, reg.cells, beltype.c_str(ctx)); + if (!changed) { + if (reg.cells > reg.bels) + log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0, reg.x1, + reg.y1, reg.cells, beltype.c_str(ctx)); + else + break; + } + } } } diff --git a/common/placer_math.c b/common/placer_math.c index 53b6190d..1aa74a9d 100644 --- a/common/placer_math.c +++ b/common/placer_math.c @@ -4,7 +4,7 @@ #include void taucif_init_solver() { - taucs_logfile("stdout"); + //taucs_logfile("stdout"); } struct taucif_system { @@ -39,7 +39,7 @@ void taucif_finalise_matrix(struct taucif_system *sys) { int taucif_solve_system(struct taucif_system *sys, double *x, double *rhs) { // FIXME: preconditioner, droptol?? - taucs_ccs_matrix* precond_mat = taucs_ccs_factor_llt(sys->mat, 1e-3, 0); + taucs_ccs_matrix* precond_mat = taucs_ccs_factor_llt(sys->mat, 1e-2, 0); if (precond_mat == NULL) return -1; // FIXME: itermax, convergetol From 8295f997aee6d70b5633f62962f3ac65d3db72a5 Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 25 Jan 2019 18:26:14 +0000 Subject: [PATCH 36/59] HeAP: Use for ECP5 as well as iCE40 Signed-off-by: David Shah --- common/placer_heap.cc | 29 ++++++++++++++++++++--------- ecp5/arch.cc | 9 ++------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 106c42e9..4b618265 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -136,12 +136,10 @@ class HeAPPlacer setup_solve_cells(); auto solve_startt = std::chrono::high_resolution_clock::now(); std::thread xaxis([&](){build_solve_direction(false, -1);}); - std::thread yaxis([&](){build_solve_direction(true, -1);}); + build_solve_direction(true, -1); + xaxis.join(); auto solve_endt = std::chrono::high_resolution_clock::now(); solve_time += std::chrono::duration(solve_endt - solve_startt).count(); - xaxis.join(); - yaxis.join(); - update_all_chains(); @@ -161,13 +159,23 @@ class HeAPPlacer std::vector> heap_runs; std::unordered_set all_celltypes; + std::unordered_map ct_count; for (auto cell : place_cells) { if (!all_celltypes.count(cell->type)) { heap_runs.push_back(std::unordered_set{cell->type}); all_celltypes.insert(cell->type); } + ct_count[cell->type]++; } + // If more than 98% of cells are one cell type, always solve all at once + // Otherwise, follow full HeAP strategy of rotate&all + for (auto &c : ct_count) + if (c.second >= 0.98 * int(place_cells.size())) { + heap_runs.clear(); + break; + } + heap_runs.push_back(all_celltypes); while (!valid || (stalled < 5 && (solved_hpwl <= legal_hpwl * 0.8))) { @@ -177,6 +185,8 @@ class HeAPPlacer valid = true; } for (auto &run : heap_runs) { + auto run_startt = std::chrono::high_resolution_clock::now(); + setup_solve_cells(&run); if (solve_cells.empty()) continue; @@ -188,9 +198,8 @@ class HeAPPlacer build_solve_direction(true, (iter == 0) ? -1 : iter); } else { std::thread xaxis([&](){build_solve_direction(false, (iter == 0) ? -1 : iter);}); - std::thread yaxis([&](){build_solve_direction(true, (iter == 0) ? -1 : iter);}); + build_solve_direction(true, (iter == 0) ? -1 : iter); xaxis.join(); - yaxis.join(); } auto solve_endt = std::chrono::high_resolution_clock::now(); solve_time += std::chrono::duration(solve_endt - solve_startt).count(); @@ -210,6 +219,8 @@ class HeAPPlacer legal_hpwl = total_hpwl(); log_info("Legalised HPWL = %d (%s)\n", int(legal_hpwl), valid ? "valid" : "invalid"); + auto run_stopt = std::chrono::high_resolution_clock::now(); + log_info(" %s runtime: %.02fs\n",(run.size() > 1 ? "ALL" : run.begin()->c_str(ctx)), std::chrono::duration(run_stopt - run_startt).count()); } @@ -473,7 +484,7 @@ class HeAPPlacer cell_locs[cell.first].locked = false; cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel); // FIXME - if (has_connectivity(cell.second) && cell.second->type != ctx->id("SB_IO")) { + if (has_connectivity(cell.second) && cell.second->type != ctx->id("SB_IO")&& cell.second->type != ctx->id("TRELLIS_IO")) { place_cells.push_back(ci); placed = true; } else { @@ -617,7 +628,7 @@ class HeAPPlacer if (user_idx != -1 && net_crit.count(ni->name)) { auto &nc = net_crit.at(ni->name); if (user_idx < int(nc.criticality.size())) - weight *= (1.0 + 20 * std::pow(nc.criticality.at(user_idx), 2)); + weight *= (1.0 + 10 * std::pow(nc.criticality.at(user_idx), 2)); } // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, @@ -632,7 +643,7 @@ class HeAPPlacer }); } if (iter != -1) { - const float alpha = 0.2; + const float alpha = 0.1; for (size_t row = 0; row < solve_cells.size(); row++) { int l_pos = legal_pos(solve_cells.at(row)); int c_pos = cell_pos(solve_cells.at(row)); diff --git a/ecp5/arch.cc b/ecp5/arch.cc index 8ba8d28e..17dac59a 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -23,6 +23,7 @@ #include #include #include +#include "placer_heap.h" #include "gfx.h" #include "globals.h" #include "log.h" @@ -504,13 +505,7 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay // ----------------------------------------------------------------------- -bool Arch::place() -{ - bool result = placer1(getCtx(), Placer1Cfg(getCtx())); - if (result) - permute_luts(); - return result; -} +bool Arch::place() { return placer_heap(getCtx()); } bool Arch::route() { From fb02fc69c6cefba2297656df8ee3cb01a2efe910 Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 25 Jan 2019 19:24:54 +0000 Subject: [PATCH 37/59] HeAP: Make strict legalisation wirelength driven where needed Signed-off-by: David Shah --- common/placer_heap.cc | 68 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 6 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 4b618265..aa75752d 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -749,16 +749,23 @@ class HeAPPlacer auto &fb = fast_bels.at(bt); int radius = 0; int iter = 0; + int iter_at_radius = 0; bool placed = false; + BelId bestBel; + int best_inp_len = std::numeric_limits::max(); + while (!placed) { int nx = ctx->rng(2 * radius + 1) + std::max(cell_locs.at(ci->name).x - radius, 0); int ny = ctx->rng(2 * radius + 1) + std::max(cell_locs.at(ci->name).y - radius, 0); iter++; - if ((iter % (20 * (radius + 1))) == 0) + iter_at_radius++; + if (iter >= (10 * (radius + 1))) { radius = std::min(std::max(max_x, max_y), radius + 1); - + iter_at_radius = 0; + iter = 0; + } if (nx < 0 || nx > max_x) continue; if (ny < 0 || ny > max_y) @@ -774,22 +781,61 @@ class HeAPPlacer if (fb.at(nx).at(ny).empty()) continue; + int need_to_explore = 2 * radius; + + if (iter_at_radius >= need_to_explore && bestBel != BelId()) { + CellInfo *bound = ctx->getBoundBelCell(bestBel); + if (bound != nullptr) { + ctx->unbindBel(bound->bel); + remaining.emplace(chain_size[bound->name], bound->name); + } + ctx->bindBel(bestBel, ci, STRENGTH_WEAK); + placed = true; + Loc loc = ctx->getBelLocation(bestBel); + cell_locs[ci->name].x = loc.x; + cell_locs[ci->name].y = loc.y; + break; + } + if (ci->constr_children.empty()) { for (auto sz : fb.at(nx).at(ny)) { - if (ctx->checkBelAvail(sz) || radius > 1) { + if (ctx->checkBelAvail(sz) || radius > 2) { CellInfo *bound = ctx->getBoundBelCell(sz); if (bound != nullptr) { if (bound->constr_parent != nullptr || !bound->constr_children.empty()) continue; ctx->unbindBel(bound->bel); - remaining.emplace(chain_size[bound->name], bound->name); } ctx->bindBel(sz, ci, STRENGTH_WEAK); if (require_validity && !ctx->isBelLocationValid(sz)) { ctx->unbindBel(sz); if (bound != nullptr) ctx->bindBel(sz, bound, STRENGTH_WEAK); + } else if (iter_at_radius < need_to_explore) { + ctx->unbindBel(sz); + if (bound != nullptr) + ctx->bindBel(sz, bound, STRENGTH_WEAK); + int input_len = 0; + for (auto &port : ci->ports) { + auto &p = port.second; + if (p.type != PORT_IN || p.net == nullptr || p.net->driver.cell == nullptr) + continue; + CellInfo *drv = p.net->driver.cell; + auto drv_loc = cell_locs.find(drv->name); + if (drv_loc == cell_locs.end()) + continue; + if (drv_loc->second.global) + continue; + input_len += std::abs(drv_loc->second.x - nx) + std::abs(drv_loc->second.y - ny); + } + if (input_len < best_inp_len) { + best_inp_len = input_len; + bestBel = sz; + } + break; } else { + if (bound != nullptr) + remaining.emplace(chain_size[bound->name], bound->name); Loc loc = ctx->getBelLocation(sz); cell_locs[ci->name].x = loc.x; cell_locs[ci->name].y = loc.y; @@ -802,6 +848,7 @@ class HeAPPlacer // FIXME NPNR_ASSERT(false); } + } } auto endt = std::chrono::high_resolution_clock::now(); @@ -843,6 +890,13 @@ class HeAPPlacer auto startt = std::chrono::high_resolution_clock::now(); init(); find_overused_regions(); + for (auto &r : regions) { + if (merged_regions.count(r.id)) + continue; + log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, + r.bels); + } + log_break(); expand_regions(); std::queue> workqueue; std::vector> orig; @@ -1298,7 +1352,7 @@ class HeAPPlacer } // Split region into up to 10 (K) bins int K = std::min(N, 10); - std::vector> bin_bounds; // [start, end] + std::vector> bin_bounds; // [(cell start, area start)] bin_bounds.emplace_back(cells_start, area_l); for (int i = 1; i < K; i++) bin_bounds.emplace_back(cells_start + (N * i) / K, @@ -1313,12 +1367,14 @@ class HeAPPlacer : p->cell_locs.at(cut_cells.at(bl.first)->name).rawx; double orig_right = dir ? p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawy : p->cell_locs.at(cut_cells.at(br.first - 1)->name).rawx; - double m = (br.second - bl.second) / (1 + orig_right - orig_left); + double m = (br.second - bl.second) / std::max(0.00001, orig_right - orig_left); for (int j = bl.first; j < br.first; j++) { auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy : p->cell_locs.at(cut_cells.at(j)->name).rawx; + double orig_pos = pos; NPNR_ASSERT(pos >= orig_left && pos <= orig_right); pos = bl.second + m * (pos - orig_left); + //log("[%f, %f] -> [%f, %f]: %f -> %f\n", orig_left, orig_right, bl.second, br.second, orig_pos, pos); } } }; From 2e2f44c82efeb327d492f55fb5b92103d65f3d61 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sat, 26 Jan 2019 13:22:44 +0000 Subject: [PATCH 38/59] HeAP: tidying up Signed-off-by: David Shah --- common/placer1.cc | 13 +-- common/placer_heap.cc | 187 ++++++++++++++++++------------------------ ecp5/arch.cc | 2 +- 3 files changed, 86 insertions(+), 116 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index a4906985..368d9dde 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -183,8 +183,9 @@ class SAPlacer auto bound_cell = ctx->getBoundBelCell(bel); if (bound_cell) { - log_error("Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n", - cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx)); + log_error( + "Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n", + cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx)); } ctx->bindBel(bel, cell, STRENGTH_USER); @@ -198,7 +199,6 @@ class SAPlacer // Sort to-place cells for deterministic initial placement - for (auto &cell : ctx->cells) { CellInfo *ci = cell.second.get(); if (ci->bel == BelId()) { @@ -225,7 +225,8 @@ class SAPlacer assign_budget(ctx); ctx->yield(); auto iplace_end = std::chrono::high_resolution_clock::now(); - log_info("Initial placement time %.02fs\n", std::chrono::duration(iplace_end - iplace_start).count()); + log_info("Initial placement time %.02fs\n", + std::chrono::duration(iplace_end - iplace_start).count()); log_info("Running simulated annealing placer.\n"); } else { for (auto &cell : ctx->cells) { @@ -951,7 +952,8 @@ bool placer1(Context *ctx, Placer1Cfg cfg) } } -bool placer1_refine(Context *ctx, Placer1Cfg cfg) { +bool placer1_refine(Context *ctx, Placer1Cfg cfg) +{ try { SAPlacer placer(ctx, cfg); placer.place(true); @@ -970,5 +972,4 @@ bool placer1_refine(Context *ctx, Placer1Cfg cfg) { } } - NEXTPNR_NAMESPACE_END diff --git a/common/placer_heap.cc b/common/placer_heap.cc index aa75752d..08e65f9b 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -22,24 +22,31 @@ * [[cite]] SimPL * SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov * http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf + * + * Notable changes from the original algorithm + * - Following the other nextpnr placer, Bels are placed rather than CLBs. This means a strict legalisation pass is + * added in addition to coarse legalisation (referred to as "spreading" to avoid confusion with strict legalisation) + * as described in HeAP to ensure validity. This searches random bels in the vicinity of the position chosen by + * spreading, with diameter increasing over iterations, with a heuristic to prefer lower wirelength choices. + * - To make the placer timing-driven, the bound2bound weights are multiplied by (1 + 10 * crit^2) */ +#include +#include #include +#include #include #include -#include -#include -#include -#include -#include #include +#include +#include #include "log.h" #include "nextpnr.h" #include "place_common.h" -#include "placer_math.h" #include "placer1.h" -#include "util.h" +#include "placer_math.h" #include "timing.h" +#include "util.h" NEXTPNR_NAMESPACE_BEGIN namespace { @@ -135,7 +142,7 @@ class HeAPPlacer for (int i = 0; i < 4; i++) { setup_solve_cells(); auto solve_startt = std::chrono::high_resolution_clock::now(); - std::thread xaxis([&](){build_solve_direction(false, -1);}); + std::thread xaxis([&]() { build_solve_direction(false, -1); }); build_solve_direction(true, -1); xaxis.join(); auto solve_endt = std::chrono::high_resolution_clock::now(); @@ -147,15 +154,10 @@ class HeAPPlacer log_info("Initial placer iter %d, hpwl = %d\n", i, int(hpwl)); } - // legalise_with_cuts(true); - // CutLegaliser(this, ctx->id("ICESTORM_LC")).run(); - //NPNR_ASSERT(false); - - bool valid = true; wirelen_t solved_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits::max(); int iter = 0, stalled = 0; - std::vector> solution; + std::vector> solution; std::vector> heap_runs; std::unordered_set all_celltypes; @@ -177,13 +179,9 @@ class HeAPPlacer } heap_runs.push_back(all_celltypes); - - while (!valid || (stalled < 5 && (solved_hpwl <= legal_hpwl * 0.8))) { - if (!valid && ((solved_hpwl > legal_hpwl * 0.8) || (stalled > 5))) { - stalled = 0; - best_hpwl = std::numeric_limits::max(); - valid = true; - } + // The main HeAP placer loop + while (stalled < 5 && (solved_hpwl <= legal_hpwl * 0.8)) { + // Alternate between particular Bel types and all bels for (auto &run : heap_runs) { auto run_startt = std::chrono::high_resolution_clock::now(); @@ -197,7 +195,7 @@ class HeAPPlacer build_solve_direction(false, (iter == 0) ? -1 : iter); build_solve_direction(true, (iter == 0) ? -1 : iter); } else { - std::thread xaxis([&](){build_solve_direction(false, (iter == 0) ? -1 : iter);}); + std::thread xaxis([&]() { build_solve_direction(false, (iter == 0) ? -1 : iter); }); build_solve_direction(true, (iter == 0) ? -1 : iter); xaxis.join(); } @@ -209,19 +207,19 @@ class HeAPPlacer update_all_chains(); for (auto type : sorted(run)) - CutLegaliser(this, type).run(); + CutSpreader(this, type).run(); update_all_chains(); legal_hpwl = total_hpwl(); log_info("Spread HPWL = %d\n", int(legal_hpwl)); - legalise_placement_simple(valid); + legalise_placement_strict(true); update_all_chains(); legal_hpwl = total_hpwl(); - log_info("Legalised HPWL = %d (%s)\n", int(legal_hpwl), valid ? "valid" : "invalid"); + log_info("Legalised HPWL = %d\n", int(legal_hpwl)); auto run_stopt = std::chrono::high_resolution_clock::now(); - log_info(" %s runtime: %.02fs\n",(run.size() > 1 ? "ALL" : run.begin()->c_str(ctx)), std::chrono::duration(run_stopt - run_startt).count()); - + log_info(" %s runtime: %.02fs\n", (run.size() > 1 ? "ALL" : run.begin()->c_str(ctx)), + std::chrono::duration(run_stopt - run_startt).count()); } if (ctx->timing_driven) @@ -230,15 +228,11 @@ class HeAPPlacer if (legal_hpwl < best_hpwl) { best_hpwl = legal_hpwl; stalled = 0; - - if (valid) { - // Save solution - solution.clear(); - for (auto cell : sorted(ctx->cells)) { - solution.emplace_back(cell.second, cell.second->bel, cell.second->belStrength); - } + // Save solution + solution.clear(); + for (auto cell : sorted(ctx->cells)) { + solution.emplace_back(cell.second, cell.second->bel, cell.second->belStrength); } - } else { ++stalled; } @@ -268,7 +262,7 @@ class HeAPPlacer auto endtt = std::chrono::high_resolution_clock::now(); log_info("HeAP Placer Time: %.02fs\n", std::chrono::duration(endtt - startt).count()); log_info(" of which solving equations: %.02fs\n", solve_time); - log_info(" of which coarse legalisation: %.02fs\n", cl_time); + log_info(" of which spreading cells: %.02fs\n", cl_time); log_info(" of which strict legalisation: %.02fs\n", sl_time); placer1_refine(ctx, Placer1Cfg(ctx)); @@ -359,7 +353,6 @@ class HeAPPlacer placed_cells++; } } - int constr_placed_cells = placed_cells; log_info("Placed %d cells based on constraints.\n", int(placed_cells)); ctx->yield(); } @@ -428,7 +421,8 @@ class HeAPPlacer } // Build and solve in one direction - void build_solve_direction(bool yaxis, int iter) { + void build_solve_direction(bool yaxis, int iter) + { for (int i = 0; i < 5; i++) { EquationSystem esx(solve_cells.size(), solve_cells.size()); build_equations(esx, yaxis, iter); @@ -458,9 +452,7 @@ class HeAPPlacer available_bels[ctx->getBelType(bel)].push_back(bel); } for (auto &t : available_bels) { - std::random_shuffle(t.second.begin(), t.second.end(), [&](size_t n){ - return ctx->rng(int(n)); - }); + std::random_shuffle(t.second.begin(), t.second.end(), [&](size_t n) { return ctx->rng(int(n)); }); } for (auto cell : sorted(ctx->cells)) { CellInfo *ci = cell.second; @@ -484,7 +476,8 @@ class HeAPPlacer cell_locs[cell.first].locked = false; cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel); // FIXME - if (has_connectivity(cell.second) && cell.second->type != ctx->id("SB_IO")&& cell.second->type != ctx->id("TRELLIS_IO")) { + if (has_connectivity(cell.second) && cell.second->type != ctx->id("SB_IO") && + cell.second->type != ctx->id("TRELLIS_IO")) { place_cells.push_back(ci); placed = true; } else { @@ -495,10 +488,8 @@ class HeAPPlacer } else { available_bels.at(ci->type).push_front(bel); } - } } - } } } @@ -568,7 +559,9 @@ class HeAPPlacer { // Return the x or y position of a cell, depending on ydir auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; }; - auto legal_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).legal_y : cell_locs.at(cell->name).legal_x; }; + auto legal_pos = [&](CellInfo *cell) { + return yaxis ? cell_locs.at(cell->name).legal_y : cell_locs.at(cell->name).legal_x; + }; es.reset(); @@ -621,8 +614,6 @@ class HeAPPlacer if (other == &port) return; int o_pos = cell_pos(other->cell); - // if (o_pos == this_pos) - // return; // FIXME: or clamp to 1? double weight = 1.0 / (ni->users.size() * std::max(1, std::abs(o_pos - this_pos))); if (user_idx != -1 && net_crit.count(ni->name)) { @@ -698,27 +689,8 @@ class HeAPPlacer return hpwl; } - // Swap the Bel of a cell with another, return the original location - BelId swap_cell_bels(CellInfo *cell, BelId newBel) - { - BelId oldBel = cell->bel; - CellInfo *bound = ctx->getBoundBelCell(newBel); - if (bound != nullptr) - ctx->unbindBel(newBel); - ctx->unbindBel(oldBel); - ctx->bindBel(newBel, cell, STRENGTH_WEAK); - if (bound != nullptr) - ctx->bindBel(oldBel, bound, STRENGTH_WEAK); - return oldBel; - } - - // Placement legalisation - // Note that there are *two meanings* of legalisation in nextpnr placement - // The first kind, as in HeAP, simply ensures that there is no overlap (each Bel maps only to one cell) - // The second kind also ensures that validity rules (isValidBelForCell) are met, because there is no guarantee - // in nextpnr that Bels are freely swappable (indeed many a architectures Bel is a logic cell with complex - // validity rules for control sets, etc, rather than a CLB/tile as in a more conventional pack&place flow) - void legalise_placement_simple(bool require_validity = false) + // Strict placement legalisation, performed after the initial HeAP spreading + void legalise_placement_strict(bool require_validity = false) { auto startt = std::chrono::high_resolution_clock::now(); @@ -848,13 +820,12 @@ class HeAPPlacer // FIXME NPNR_ASSERT(false); } - } } auto endt = std::chrono::high_resolution_clock::now(); sl_time += std::chrono::duration(endt - startt).count(); } - + // Implementation of the cut-based spreading as described in the HeAP/SimPL papers static constexpr float beta = 0.9; struct ChainExtent @@ -862,12 +833,11 @@ class HeAPPlacer int x0, y0, x1, y1; }; - struct LegaliserRegion + struct SpreaderRegion { int id; int x0, y0, x1, y1; int cells, bels; - std::unordered_set included_chains; bool overused() const { if (bels < 4) @@ -877,10 +847,10 @@ class HeAPPlacer } }; - class CutLegaliser + class CutSpreader { public: - CutLegaliser(HeAPPlacer *p, IdString beltype) + CutSpreader(HeAPPlacer *p, IdString beltype) : p(p), ctx(p->ctx), beltype(beltype), fb(p->fast_bels.at(std::get<0>(p->bel_types.at(beltype)))) { } @@ -893,23 +863,28 @@ class HeAPPlacer for (auto &r : regions) { if (merged_regions.count(r.id)) continue; +#if 0 log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, r.bels); +#endif } - log_break(); expand_regions(); std::queue> workqueue; +#if 0 std::vector> orig; if (ctx->debug) for (auto c : p->solve_cells) orig.emplace_back(p->cell_locs[c->name].rawx, p->cell_locs[c->name].rawy); +#endif for (auto &r : regions) { if (merged_regions.count(r.id)) continue; +#if 0 log_info("%s (%d, %d) |_> (%d, %d) %d/%d\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, r.bels); +#endif workqueue.emplace(r.id, false); - //cut_region(r, false); + // cut_region(r, false); } while (!workqueue.empty()) { auto front = workqueue.front(); @@ -917,24 +892,21 @@ class HeAPPlacer auto &r = regions.at(front.first); if (r.cells == 0) continue; - //log_info("%s (%d, %d) |_> (%d, %d) %d/%d %c\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, r.bels, front.second ? 'y' : 'x'); auto res = cut_region(r, front.second); if (res) { workqueue.emplace(res->first, !front.second); workqueue.emplace(res->second, !front.second); } else { // Try the other dir, in case stuck in one direction only - //log_info("RETRY %s (%d, %d) |_> (%d, %d) %d/%d %c\n", beltype.c_str(ctx), r.x0, r.y0, r.x1, r.y1, r.cells, r.bels, front.second ? 'x' : 'y'); - auto res2 = cut_region(r, !front.second); if (res2) { - //log_info("RETRY SUCCESS\n"); + // log_info("RETRY SUCCESS\n"); workqueue.emplace(res2->first, front.second); workqueue.emplace(res2->second, front.second); } } - } +#if 0 if (ctx->debug) { std::ofstream sp("spread" + std::to_string(seq) + ".csv"); for (size_t i = 0; i < p->solve_cells.size(); i++) { @@ -952,6 +924,7 @@ class HeAPPlacer } ++seq; } +#endif auto endt = std::chrono::high_resolution_clock::now(); p->cl_time += std::chrono::duration(endt - startt).count(); } @@ -967,7 +940,7 @@ class HeAPPlacer std::vector>> &fb; - std::vector regions; + std::vector regions; std::unordered_set merged_regions; // Cells at a location, sorted by real (not integer) x and y std::vector>> cells_at_location; @@ -1037,13 +1010,10 @@ class HeAPPlacer for (auto cell : p->solve_cells) { if (cell->type != beltype) continue; - cells_at_location.at(p->cell_locs.at(cell->name).x) - .at(p->cell_locs.at(cell->name).y) - .push_back(cell); + cells_at_location.at(p->cell_locs.at(cell->name).x).at(p->cell_locs.at(cell->name).y).push_back(cell); } - } - void merge_regions(LegaliserRegion &merged, LegaliserRegion &mergee) + void merge_regions(SpreaderRegion &merged, SpreaderRegion &mergee) { // Prevent grow_region from recursing while doing this for (int x = mergee.x0; x <= mergee.x1; x++) @@ -1058,7 +1028,7 @@ class HeAPPlacer grow_region(merged, mergee.x0, mergee.y0, mergee.x1, mergee.y1); } - void grow_region(LegaliserRegion &r, int x0, int y0, int x1, int y1, bool init = false) + void grow_region(SpreaderRegion &r, int x0, int y0, int x1, int y1, bool init = false) { // log_info("growing to (%d, %d) |_> (%d, %d)\n", x0, y0, x1, y1); if ((x0 >= r.x0 && y0 >= r.y0 && x1 <= r.x1 && y1 <= r.y1) || init) @@ -1106,7 +1076,7 @@ class HeAPPlacer // log_info("%d %d %d\n", x, y, occ_at(x, y)); int id = int(regions.size()); groups.at(x).at(y) = id; - LegaliserRegion reg; + SpreaderRegion reg; reg.id = id; reg.x0 = reg.x1 = x; reg.y0 = reg.y1 = y; @@ -1199,12 +1169,11 @@ class HeAPPlacer } if (!changed) { if (reg.cells > reg.bels) - log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0, reg.x1, - reg.y1, reg.cells, beltype.c_str(ctx)); + log_error("Failed to expand region (%d, %d) |_> (%d, %d) of %d %ss\n", reg.x0, reg.y0, + reg.x1, reg.y1, reg.cells, beltype.c_str(ctx)); else break; } - } } } @@ -1214,7 +1183,7 @@ class HeAPPlacer std::vector cut_cells; - boost::optional> cut_region(LegaliserRegion &r, bool dir) + boost::optional> cut_region(SpreaderRegion &r, bool dir) { cut_cells.clear(); auto &cal = cells_at_location; @@ -1229,7 +1198,8 @@ class HeAPPlacer total_cells += p->chain_size.count(cell->name) ? p->chain_size.at(cell->name) : 1; } std::sort(cut_cells.begin(), cut_cells.end(), [&](const CellInfo *a, const CellInfo *b) { - return dir ? (p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy) : (p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx); + return dir ? (p->cell_locs.at(a->name).rawy < p->cell_locs.at(b->name).rawy) + : (p->cell_locs.at(a->name).rawx < p->cell_locs.at(b->name).rawx); }); if (cut_cells.size() < 2) @@ -1245,7 +1215,7 @@ class HeAPPlacer } if (pivot == int(cut_cells.size())) pivot = int(cut_cells.size()) - 1; - //log_info("orig pivot %d lc %d rc %d\n", pivot, pivot_cells, r.cells - pivot_cells); + // log_info("orig pivot %d lc %d rc %d\n", pivot, pivot_cells, r.cells - pivot_cells); // Find the clearance required either side of the pivot int clearance_l = 0, clearance_r = 0; @@ -1290,7 +1260,7 @@ class HeAPPlacer break; trimmed_r--; } - //log_info("tl %d tr %d cl %d cr %d\n", trimmed_l, trimmed_r, clearance_l, clearance_r); + // log_info("tl %d tr %d cl %d cr %d\n", trimmed_l, trimmed_r, clearance_l, clearance_r); if ((trimmed_r - trimmed_l + 1) <= std::max(clearance_l, clearance_r)) return {}; // Now find the initial target cut that minimises utilisation imbalance, whilst @@ -1321,7 +1291,8 @@ class HeAPPlacer NPNR_ASSERT(best_tgt_cut != -1); left_bels = target_cut_bels.first; right_bels = target_cut_bels.second; - //log_info("pivot %d target cut %d lc %d lb %d rc %d rb %d\n", pivot, best_tgt_cut, left_cells, left_bels, right_cells, right_bels); + // log_info("pivot %d target cut %d lc %d lb %d rc %d rb %d\n", pivot, best_tgt_cut, left_cells, left_bels, + // right_cells, right_bels); // Peturb the source cut to eliminate overutilisation while (pivot > 0 && (double(left_cells) / double(left_bels) > double(right_cells) / double(right_bels))) { @@ -1331,14 +1302,16 @@ class HeAPPlacer right_cells += size; pivot--; } - while (pivot < int(cut_cells.size()) - 1 && (double(left_cells) / double(left_bels) < double(right_cells) / double(right_bels))) { + while (pivot < int(cut_cells.size()) - 1 && + (double(left_cells) / double(left_bels) < double(right_cells) / double(right_bels))) { auto &move_cell = cut_cells.at(pivot + 1); int size = p->chain_size.count(move_cell->name) ? p->chain_size.at(move_cell->name) : 1; left_cells += size; right_cells -= size; pivot++; } - //log_info("peturbed pivot %d lc %d lb %d rc %d rb %d\n", pivot, left_cells, left_bels, right_cells, right_bels); + // log_info("peturbed pivot %d lc %d lb %d rc %d rb %d\n", pivot, left_cells, left_bels, right_cells, + // right_bels); // Split regions into bins, and then spread cells by linear interpolation within those bins auto spread_binlerp = [&](int cells_start, int cells_end, double area_l, double area_r) { int N = cells_end - cells_start; @@ -1355,12 +1328,8 @@ class HeAPPlacer std::vector> bin_bounds; // [(cell start, area start)] bin_bounds.emplace_back(cells_start, area_l); for (int i = 1; i < K; i++) - bin_bounds.emplace_back(cells_start + (N * i) / K, - area_l + ((area_r - area_l + 0.99) * i) / K); + bin_bounds.emplace_back(cells_start + (N * i) / K, area_l + ((area_r - area_l + 0.99) * i) / K); bin_bounds.emplace_back(cells_end, area_r + 0.99); - //log("bins "); - //for (auto b : bin_bounds) log("%d, %.01f; ", b.first, b.second); - //log("\n"); for (int i = 0; i < K; i++) { auto &bl = bin_bounds.at(i), br = bin_bounds.at(i + 1); double orig_left = dir ? p->cell_locs.at(cut_cells.at(bl.first)->name).rawy @@ -1371,10 +1340,10 @@ class HeAPPlacer for (int j = bl.first; j < br.first; j++) { auto &pos = dir ? p->cell_locs.at(cut_cells.at(j)->name).rawy : p->cell_locs.at(cut_cells.at(j)->name).rawx; - double orig_pos = pos; NPNR_ASSERT(pos >= orig_left && pos <= orig_right); pos = bl.second + m * (pos - orig_left); - //log("[%f, %f] -> [%f, %f]: %f -> %f\n", orig_left, orig_right, bl.second, br.second, orig_pos, pos); + // log("[%f, %f] -> [%f, %f]: %f -> %f\n", orig_left, orig_right, bl.second, br.second, + // orig_pos, pos); } } }; @@ -1390,9 +1359,9 @@ class HeAPPlacer cl.x = std::min(r.x1, std::max(r.x0, int(cl.rawx))); cl.y = std::min(r.y1, std::max(r.y0, int(cl.rawy))); cells_at_location.at(cl.x).at(cl.y).push_back(cell); - //log_info("spread pos %d %d\n", cl.x, cl.y); + // log_info("spread pos %d %d\n", cl.x, cl.y); } - LegaliserRegion rl, rr; + SpreaderRegion rl, rr; rl.id = int(regions.size()); rl.x0 = r.x0; rl.y0 = r.y0; @@ -1421,7 +1390,7 @@ class HeAPPlacer typedef decltype(CellInfo::udata) cell_udata_t; cell_udata_t dont_solve = std::numeric_limits::max(); }; -int HeAPPlacer::CutLegaliser::seq = 0; +int HeAPPlacer::CutSpreader::seq = 0; bool placer_heap(Context *ctx) { return HeAPPlacer(ctx).place(); } diff --git a/ecp5/arch.cc b/ecp5/arch.cc index 17dac59a..5ea6a7c3 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -23,12 +23,12 @@ #include #include #include -#include "placer_heap.h" #include "gfx.h" #include "globals.h" #include "log.h" #include "nextpnr.h" #include "placer1.h" +#include "placer_heap.h" #include "router1.h" #include "timing.h" #include "util.h" From 87edf6305f1e5e2553e239346877a9aa030d3afe Mon Sep 17 00:00:00 2001 From: David Shah Date: Sat, 26 Jan 2019 13:53:14 +0000 Subject: [PATCH 39/59] ci: Add OpenBLAS to Dockerfile Signed-off-by: David Shah --- .cirrus/Dockerfile.ubuntu16.04 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus/Dockerfile.ubuntu16.04 b/.cirrus/Dockerfile.ubuntu16.04 index 1b93cfb8..7de6441e 100644 --- a/.cirrus/Dockerfile.ubuntu16.04 +++ b/.cirrus/Dockerfile.ubuntu16.04 @@ -8,7 +8,7 @@ RUN set -e -x ;\ apt-get -y install \ build-essential autoconf cmake clang bison wget flex gperf \ libreadline-dev gawk tcl-dev libffi-dev graphviz xdot python3-dev \ - libboost-all-dev qt5-default git libftdi-dev pkg-config + libboost-all-dev qt5-default git libftdi-dev pkg-config libopenblas-dev RUN set -e -x ;\ mkdir -p /usr/local/src ;\ From 70a6379bf65352a62053a70ccfaa6c73898103ed Mon Sep 17 00:00:00 2001 From: David Shah Date: Wed, 30 Jan 2019 16:36:01 +0000 Subject: [PATCH 40/59] HeAP: Chain support Signed-off-by: David Shah --- common/placer1.cc | 12 +++++- common/placer_heap.cc | 99 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 103 insertions(+), 8 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index 368d9dde..9b4b066e 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -475,9 +475,11 @@ class SAPlacer { static const double epsilon = 1e-20; moveChange.reset(); + if (is_constrained(cell)) + return false; BelId oldBel = cell->bel; CellInfo *other_cell = ctx->getBoundBelCell(newBel); - if (other_cell != nullptr && other_cell->belStrength > STRENGTH_WEAK) { + if (other_cell != nullptr && (is_constrained(other_cell) || other_cell->belStrength > STRENGTH_WEAK)) { return false; } int old_dist = get_constraints_distance(ctx, cell); @@ -529,6 +531,11 @@ class SAPlacer goto swap_fail; } commit_cost_changes(moveChange); +#if 0 + log_info("swap %s -> %s\n", cell->name.c_str(ctx), ctx->getBelName(newBel).c_str(ctx)); + if (other_cell != nullptr) + log_info("swap %s -> %s\n", other_cell->name.c_str(ctx), ctx->getBelName(oldBel).c_str(ctx)); +#endif return true; swap_fail: ctx->bindBel(oldBel, cell, STRENGTH_WEAK); @@ -547,6 +554,9 @@ class SAPlacer BelId swap_cell_bels(CellInfo *cell, BelId newBel) { BelId oldBel = cell->bel; +#if 0 + log_info("%s old: %s new: %s\n", cell->name.c_str(ctx), ctx->getBelName(cell->bel).c_str(ctx), ctx->getBelName(newBel).c_str(ctx)); +#endif CellInfo *bound = ctx->getBoundBelCell(newBel); if (bound != nullptr) ctx->unbindBel(newBel); diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 08e65f9b..dd948753 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -258,12 +258,24 @@ class HeAPPlacer ctx->bindBel(bel, cell, strength); } + for (auto cell : sorted(ctx->cells)) { + if (cell.second->bel == BelId()) + log_error("Found unbound cell %s\n", cell.first.c_str(ctx)); + if (ctx->getBoundBelCell(cell.second->bel) != cell.second) + log_error("Found cell %s with mismatched binding\n", cell.first.c_str(ctx)); + if (ctx->debug) + log_info("AP soln: %s -> %s\n", cell.first.c_str(ctx), ctx->getBelName(cell.second->bel).c_str(ctx)); + } + ctx->unlock(); auto endtt = std::chrono::high_resolution_clock::now(); log_info("HeAP Placer Time: %.02fs\n", std::chrono::duration(endtt - startt).count()); log_info(" of which solving equations: %.02fs\n", solve_time); log_info(" of which spreading cells: %.02fs\n", cl_time); log_info(" of which strict legalisation: %.02fs\n", sl_time); + + ctx->check(); + placer1_refine(ctx, Placer1Cfg(ctx)); return true; @@ -529,7 +541,7 @@ class HeAPPlacer cell_locs[child->name].y = std::min(max_y, base.y + child->constr_y); else cell_locs[child->name].y = base.y; // better handling of UNCONSTR? - chain_root[cell->name] = root; + chain_root[child->name] = root; if (!child->constr_children.empty()) update_chain(child, root); } @@ -697,7 +709,8 @@ class HeAPPlacer // Unbind all cells placed in this solution for (auto cell : sorted(ctx->cells)) { CellInfo *ci = cell.second; - if (ci->udata != dont_solve && ci->bel != BelId()) + if (ci->bel != BelId() && (ci->udata != dont_solve || + (chain_root.count(ci->name) && chain_root.at(ci->name)->udata != dont_solve))) ctx->unbindBel(ci->bel); } @@ -769,12 +782,13 @@ class HeAPPlacer break; } - if (ci->constr_children.empty()) { + if (ci->constr_children.empty() && !ci->constr_abs_z) { for (auto sz : fb.at(nx).at(ny)) { if (ctx->checkBelAvail(sz) || radius > 2) { CellInfo *bound = ctx->getBoundBelCell(sz); if (bound != nullptr) { - if (bound->constr_parent != nullptr || !bound->constr_children.empty()) + if (bound->constr_parent != nullptr || !bound->constr_children.empty() || + bound->constr_abs_z) continue; ctx->unbindBel(bound->bel); } @@ -817,8 +831,78 @@ class HeAPPlacer } } } else { - // FIXME - NPNR_ASSERT(false); + for (auto sz : fb.at(nx).at(ny)) { + Loc loc = ctx->getBelLocation(sz); + if (ci->constr_abs_z && loc.z != ci->constr_z) + continue; + std::vector> targets; + std::vector> swaps_made; + std::queue> visit; + visit.emplace(ci, loc); + while (!visit.empty()) { + CellInfo *vc = visit.front().first; + NPNR_ASSERT(vc->bel == BelId()); + Loc ploc = visit.front().second; + visit.pop(); + BelId target = ctx->getBelByLocation(ploc); + CellInfo *bound; + if (target == BelId()) + goto fail; + bound = ctx->getBoundBelCell(target); + // Chains cannot overlap + if (bound != nullptr) + if (bound->constr_z != bound->UNCONSTR || bound->constr_parent != nullptr || + !bound->constr_children.empty() || bound->belStrength > STRENGTH_WEAK) + goto fail; + targets.emplace_back(vc, target); + for (auto child : vc->constr_children) { + Loc cloc = ploc; + if (child->constr_x != child->UNCONSTR) + cloc.x += child->constr_x; + if (child->constr_y != child->UNCONSTR) + cloc.y += child->constr_y; + if (child->constr_z != child->UNCONSTR) + cloc.z = child->constr_abs_z ? child->constr_z : (ploc.z + child->constr_z); + visit.emplace(child, cloc); + } + } + + for (auto &target : targets) { + CellInfo *bound = ctx->getBoundBelCell(target.second); + if (bound != nullptr) + ctx->unbindBel(target.second); + ctx->bindBel(target.second, target.first, STRENGTH_STRONG); + swaps_made.emplace_back(target.second, bound); + } + + for (auto &sm : swaps_made) { + if (!ctx->isBelLocationValid(sm.first)) + goto fail; + } + + if (false) { + fail: + for (auto &swap : swaps_made) { + ctx->unbindBel(swap.first); + if (swap.second != nullptr) + ctx->bindBel(swap.first, swap.second, STRENGTH_WEAK); + } + continue; + } + for (auto &target : targets) { + Loc loc = ctx->getBelLocation(target.second); + cell_locs[target.first->name].x = loc.x; + cell_locs[target.first->name].y = loc.y; + // log_info("%s %d %d %d\n", target.first->name.c_str(ctx), loc.x, loc.y, loc.z); + } + for (auto &swap : swaps_made) { + if (swap.second != nullptr) + remaining.emplace(chain_size[swap.second->name], swap.second->name); + } + + placed = true; + break; + } } } } @@ -1288,7 +1372,8 @@ class HeAPPlacer } } } - NPNR_ASSERT(best_tgt_cut != -1); + if (best_tgt_cut == -1) + return {}; left_bels = target_cut_bels.first; right_bels = target_cut_bels.second; // log_info("pivot %d target cut %d lc %d lb %d rc %d rb %d\n", pivot, best_tgt_cut, left_cells, left_bels, From 352f15e96b408dac40658ab058a7575d9daa85cb Mon Sep 17 00:00:00 2001 From: David Shah Date: Wed, 30 Jan 2019 17:31:33 +0000 Subject: [PATCH 41/59] HeAP: Avoid getting stuck in legaliser ripup Signed-off-by: David Shah --- common/placer_heap.cc | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index dd948753..c9ec068d 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -720,7 +720,8 @@ class HeAPPlacer for (auto cell : solve_cells) { remaining.emplace(chain_size[cell->name], cell->name); } - + int ripup_radius = 2; + int total_iters = 0; while (!remaining.empty()) { auto top = remaining.top(); remaining.pop(); @@ -729,7 +730,7 @@ class HeAPPlacer // Was now placed, ignore if (ci->bel != BelId()) continue; - // log_info(" Legalising %s\n", top.second.c_str(ctx)); + // log_info(" Legalising %s (%s)\n", top.second.c_str(ctx), ci->type.c_str(ctx)); int bt = std::get<0>(bel_types.at(ci->type)); auto &fb = fast_bels.at(bt); int radius = 0; @@ -739,6 +740,12 @@ class HeAPPlacer BelId bestBel; int best_inp_len = std::numeric_limits::max(); + total_iters++; + if (total_iters > int(solve_cells.size())) { + total_iters = 0; + ripup_radius = std::max(std::max(max_x, max_y), ripup_radius * 2); + } + while (!placed) { int nx = ctx->rng(2 * radius + 1) + std::max(cell_locs.at(ci->name).x - radius, 0); @@ -748,6 +755,22 @@ class HeAPPlacer iter_at_radius++; if (iter >= (10 * (radius + 1))) { radius = std::min(std::max(max_x, max_y), radius + 1); + while (radius < std::max(max_x, max_y)) { + for (int x = std::max(0, cell_locs.at(ci->name).x - radius); + x <= std::min(max_x, cell_locs.at(ci->name).x + radius); x++) { + if (x >= int(fb.size())) + break; + for (int y = std::max(0, cell_locs.at(ci->name).y - radius); + y <= std::min(max_y, cell_locs.at(ci->name).y + radius); y++) { + if (y >= int(fb.at(x).size())) + break; + if (fb.at(x).at(y).size() > 0) + goto notempty; + } + } + radius = std::min(std::max(max_x, max_y), radius + 1); + } + notempty: iter_at_radius = 0; iter = 0; } @@ -784,7 +807,7 @@ class HeAPPlacer if (ci->constr_children.empty() && !ci->constr_abs_z) { for (auto sz : fb.at(nx).at(ny)) { - if (ctx->checkBelAvail(sz) || radius > 2) { + if (ctx->checkBelAvail(sz) || radius > ripup_radius) { CellInfo *bound = ctx->getBoundBelCell(sz); if (bound != nullptr) { if (bound->constr_parent != nullptr || !bound->constr_children.empty() || From 6cf001d35546fb05d0d3dec4a22bd48fecad0126 Mon Sep 17 00:00:00 2001 From: David Shah Date: Wed, 30 Jan 2019 19:28:15 +0000 Subject: [PATCH 42/59] HeAP: Legaliser fixes Signed-off-by: David Shah --- common/placer_heap.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index c9ec068d..4f62e77b 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -807,7 +807,7 @@ class HeAPPlacer if (ci->constr_children.empty() && !ci->constr_abs_z) { for (auto sz : fb.at(nx).at(ny)) { - if (ctx->checkBelAvail(sz) || radius > ripup_radius) { + if (ctx->checkBelAvail(sz) || (radius > ripup_radius || ctx->rng(20000) < 10)) { CellInfo *bound = ctx->getBoundBelCell(sz); if (bound != nullptr) { if (bound->constr_parent != nullptr || !bound->constr_children.empty() || @@ -869,7 +869,7 @@ class HeAPPlacer visit.pop(); BelId target = ctx->getBelByLocation(ploc); CellInfo *bound; - if (target == BelId()) + if (target == BelId() || ctx->getBelType(target) != vc->type) goto fail; bound = ctx->getBoundBelCell(target); // Chains cannot overlap From 8e4e03d98096604c11614bcb5bf89ba995ea5251 Mon Sep 17 00:00:00 2001 From: David Shah Date: Wed, 30 Jan 2019 21:41:00 +0100 Subject: [PATCH 43/59] HeAP: Fix occupancy count Signed-off-by: David Shah --- common/placer_heap.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 4f62e77b..7a6c2a3e 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -1088,7 +1088,8 @@ class HeAPPlacer for (auto &cell : p->cell_locs) { if (ctx->cells.at(cell.first)->type != beltype) continue; - + if (ctx->cells.at(cell.first)->belStrength > STRENGTH_STRONG) + continue; occupancy.at(cell.second.x).at(cell.second.y)++; // Compute ultimate extent of each chain root if (p->chain_root.count(cell.first)) { @@ -1502,4 +1503,4 @@ int HeAPPlacer::CutSpreader::seq = 0; bool placer_heap(Context *ctx) { return HeAPPlacer(ctx).place(); } -NEXTPNR_NAMESPACE_END \ No newline at end of file +NEXTPNR_NAMESPACE_END From 589b267a93a92093a442cba9d1169c13e3f0a7c6 Mon Sep 17 00:00:00 2001 From: David Shah Date: Tue, 19 Feb 2019 22:00:01 +0000 Subject: [PATCH 44/59] HeAP: Fix regression Signed-off-by: David Shah --- common/placer_math.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/common/placer_math.c b/common/placer_math.c index 1aa74a9d..b36a9ec5 100644 --- a/common/placer_math.c +++ b/common/placer_math.c @@ -38,6 +38,8 @@ void taucif_finalise_matrix(struct taucif_system *sys) { } int taucif_solve_system(struct taucif_system *sys, double *x, double *rhs) { + if (sys->mat->n <= 2) + return 0; // FIXME: preconditioner, droptol?? taucs_ccs_matrix* precond_mat = taucs_ccs_factor_llt(sys->mat, 1e-2, 0); if (precond_mat == NULL) From 1c824709e21b18073bfdc182793351e40269c373 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sat, 23 Feb 2019 17:33:47 +0000 Subject: [PATCH 45/59] HeAP: Switching from TAUCS to Eigen Signed-off-by: David Shah --- .cirrus/Dockerfile.ubuntu16.04 | 2 +- CMakeLists.txt | 11 ++++--- common/placer_heap.cc | 49 ++++++++++++++++------------- common/placer_math.c | 57 ---------------------------------- common/placer_math.h | 45 --------------------------- 5 files changed, 35 insertions(+), 129 deletions(-) delete mode 100644 common/placer_math.c delete mode 100644 common/placer_math.h diff --git a/.cirrus/Dockerfile.ubuntu16.04 b/.cirrus/Dockerfile.ubuntu16.04 index 7de6441e..0c8201b8 100644 --- a/.cirrus/Dockerfile.ubuntu16.04 +++ b/.cirrus/Dockerfile.ubuntu16.04 @@ -8,7 +8,7 @@ RUN set -e -x ;\ apt-get -y install \ build-essential autoconf cmake clang bison wget flex gperf \ libreadline-dev gawk tcl-dev libffi-dev graphviz xdot python3-dev \ - libboost-all-dev qt5-default git libftdi-dev pkg-config libopenblas-dev + libboost-all-dev qt5-default git libftdi-dev pkg-config libeigen3-dev RUN set -e -x ;\ mkdir -p /usr/local/src ;\ diff --git a/CMakeLists.txt b/CMakeLists.txt index 2fbfa735..69089c4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,7 @@ set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /D_DEBUG /W4 /wd4100 /wd4244 set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /W4 /wd4100 /wd4244 /wd4125 /wd4800 /wd4456 /wd4458 /wd4305 /wd4459 /wd4121 /wd4996 /wd4127") else() set(CMAKE_CXX_FLAGS_DEBUG "-Wall -fPIC -ggdb -pipe") -set(CMAKE_CXX_FLAGS_RELEASE "-Wall -fPIC -O3 -g -pipe") +set(CMAKE_CXX_FLAGS_RELEASE "-Wall -fPIC -O3 -g -pipe -fopenmp") endif() set(CMAKE_DEFIN) @@ -122,8 +122,6 @@ configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/common/version.h.in ${CMAKE_CURRENT_BINARY_DIR}/generated/version.h ) -add_subdirectory(3rdparty/taucs ${CMAKE_CURRENT_BINARY_DIR}/generated/taucs EXCLUDE_FROM_ALL) - if (BUILD_PYTHON) # Find Boost::Python of a suitable version in a cross-platform way # Some distributions (Arch) call it libboost_python3, others such as Ubuntu @@ -182,7 +180,10 @@ if (BUILD_PYTHON) endif () endif() -include_directories(common/ json/ ${Boost_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} 3rdparty/taucs/src ${CMAKE_CURRENT_BINARY_DIR}/generated/taucs/build/linux) +find_package (Eigen3 REQUIRED NO_MODULE) + +include_directories(common/ json/ ${Boost_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIRS}) +add_definitions(${EIGEN3_DEFINITIONS}) aux_source_directory(common/ COMMON_SRC_FILES) aux_source_directory(json/ JSON_PARSER_FILES) set(COMMON_FILES ${COMMON_SRC_FILES} ${JSON_PARSER_FILES}) @@ -246,7 +247,7 @@ foreach (family ${ARCH}) # Include family-specific source files to all family targets and set defines appropriately target_include_directories(${target} PRIVATE ${family}/ ${CMAKE_CURRENT_BINARY_DIR}/generated/) target_compile_definitions(${target} PRIVATE NEXTPNR_NAMESPACE=nextpnr_${family} ARCH_${ufamily} ARCHNAME=${family}) - target_link_libraries(${target} LINK_PUBLIC ${Boost_LIBRARIES} taucs ${link_param}) + target_link_libraries(${target} LINK_PUBLIC ${Boost_LIBRARIES} ${link_param}) if (NOT MSVC) target_link_libraries(${target} LINK_PUBLIC pthread) endif() diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 7a6c2a3e..b6913473 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -31,6 +31,8 @@ * - To make the placer timing-driven, the bound2bound weights are multiplied by (1 + 10 * crit^2) */ +#include +#include #include #include #include @@ -44,7 +46,6 @@ #include "nextpnr.h" #include "place_common.h" #include "placer1.h" -#include "placer_math.h" #include "timing.h" #include "util.h" NEXTPNR_NAMESPACE_BEGIN @@ -55,6 +56,7 @@ namespace { // solves it, and the representation that requires template struct EquationSystem { + EquationSystem(size_t rows, size_t cols) { A.resize(cols); @@ -64,7 +66,6 @@ template struct EquationSystem // Simple sparse format, easy to convert to CCS for solver std::vector>> A; // col -> (row, x[row, col]) sorted by row std::vector rhs; // RHS vector - void reset() { for (auto &col : A) @@ -93,29 +94,35 @@ template struct EquationSystem void add_rhs(int row, T val) { rhs[row] += val; } - void solve(std::vector &x) + void solve(std::vector &x) { + using namespace Eigen; + NPNR_ASSERT(x.size() == A.size()); - int nnz = std::accumulate(A.begin(), A.end(), 0, - [](int a, const std::vector> &vec) { return a + int(vec.size()); }); - taucif_system *sys = taucif_create_system(int(rhs.size()), int(A.size()), nnz); + VectorXd vx(x.size()), vb(rhs.size()); + SparseMatrix mat(A.size(), A.size()); + + std::vector colnnz; + for (auto &Ac : A) + colnnz.push_back(int(Ac.size())); + mat.reserve(colnnz); for (int col = 0; col < int(A.size()); col++) { - auto &Ac = A[col]; - for (auto &el : Ac) { - if (col <= el.first) { - // log_info("%d %d %f\n", el.first, col, el.second); - taucif_add_matrix_value(sys, el.first, col, el.second); - } - - // FIXME: in debug mode, assert really is symmetric - } + auto &Ac = A.at(col); + for (auto &el : Ac) + mat.insert(el.first, col) = el.second; } - taucif_finalise_matrix(sys); - int result = taucif_solve_system(sys, x.data(), rhs.data()); - NPNR_ASSERT(result == 0); - taucif_free_system(sys); + for (int i = 0; i < int(x.size()); i++) + vx[i] = x.at(i); + for (int i = 0; i < int(rhs.size()); i++) + vb[i] = rhs.at(i); + + ConjugateGradient, Lower | Upper> solver; + solver.setTolerance(1e-5); + VectorXd xr = solver.compute(mat).solveWithGuess(vb, vx); + for (int i = 0; i < int(x.size()); i++) + x.at(i) = xr[i]; // for (int i = 0; i < int(x.size()); i++) // log_info("x[%d] = %f\n", i, x.at(i)); } @@ -126,13 +133,13 @@ template struct EquationSystem class HeAPPlacer { public: - HeAPPlacer(Context *ctx) : ctx(ctx) {} + HeAPPlacer(Context *ctx) : ctx(ctx) { Eigen::initParallel(); } + bool place() { auto startt = std::chrono::high_resolution_clock::now(); ctx->lock(); - taucif_init_solver(); place_constraints(); build_fast_bels(); seed_placement(); diff --git a/common/placer_math.c b/common/placer_math.c deleted file mode 100644 index b36a9ec5..00000000 --- a/common/placer_math.c +++ /dev/null @@ -1,57 +0,0 @@ -#include "taucs.h" -#include "placer_math.h" -#include -#include - -void taucif_init_solver() { - //taucs_logfile("stdout"); -} - -struct taucif_system { - taucs_ccs_matrix* mat; - int ccs_i, ccs_col; -}; - -struct taucif_system *taucif_create_system(int rows, int cols, int n_nonzero) { - struct taucif_system *sys = taucs_malloc(sizeof(struct taucif_system)); - sys->mat = taucs_ccs_create(cols, rows, n_nonzero, TAUCS_DOUBLE | TAUCS_SYMMETRIC | TAUCS_LOWER); - // Internal pointers - sys->ccs_i = 0; - sys->ccs_col = -1; - return sys; -}; - -void taucif_add_matrix_value(struct taucif_system *sys, int row, int col, double value) { - assert(sys->ccs_col <= col); - while(sys->ccs_col < col) { - sys->mat->colptr[++sys->ccs_col] = sys->ccs_i; - } - sys->mat->rowind[sys->ccs_i] = row; - sys->mat->values.d[sys->ccs_i++] = value; -} - -void taucif_finalise_matrix(struct taucif_system *sys) { - sys->mat->colptr[++sys->ccs_col] = sys->ccs_i; -#if 0 - taucs_ccs_write_ijv(sys->mat, "matrix.ijv"); -#endif -} - -int taucif_solve_system(struct taucif_system *sys, double *x, double *rhs) { - if (sys->mat->n <= 2) - return 0; - // FIXME: preconditioner, droptol?? - taucs_ccs_matrix* precond_mat = taucs_ccs_factor_llt(sys->mat, 1e-2, 0); - if (precond_mat == NULL) - return -1; - // FIXME: itermax, convergetol - int cjres = taucs_conjugate_gradients(sys->mat, taucs_ccs_solve_llt, precond_mat, x, rhs, 1000, 1e-6); - taucs_ccs_free(precond_mat); - return 0; -} - -void taucif_free_system(struct taucif_system *sys) { - taucs_ccs_free(sys->mat); - taucs_free(sys); -} - diff --git a/common/placer_math.h b/common/placer_math.h deleted file mode 100644 index c197036c..00000000 --- a/common/placer_math.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * nextpnr -- Next Generation Place and Route - * - * Copyright (C) 2019 David Shah - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - */ - -#ifndef PLACER_MATH_H -#define PLACER_MATH_H -// This shim is needed because Tauc is mutually incompatible with modern C++ (implementing macros and functions -// that collide with max, min, etc) -#ifdef __cplusplus -extern "C" { -#endif -extern void taucif_init_solver(); - -struct taucif_system; - -extern struct taucif_system *taucif_create_system(int rows, int cols, int n_nonzero); - -extern void taucif_add_matrix_value(struct taucif_system *sys, int row, int col, double value); - -extern void taucif_finalise_matrix(struct taucif_system *sys); - -extern int taucif_solve_system(struct taucif_system *sys, double *x, double *rhs); - -extern void taucif_free_system(struct taucif_system *sys); - -#ifdef __cplusplus -} -#endif - -#endif \ No newline at end of file From 7142db28a8b828da557729a706c20c8f330ba129 Mon Sep 17 00:00:00 2001 From: David Shah Date: Mon, 25 Feb 2019 11:56:10 +0000 Subject: [PATCH 46/59] HeAP: Make HeAP placer optional A CMake option 'BUILD_HEAP' (default on) configures building of the HeAP placer and the associated Eigen3 dependency. Default for the iCE40 is SA placer, with --heap-placer to use HeAP Default for the ECP5 is HeAP placer, as SA placer can take 1hr+ for large ECP5 designs and HeAP tends to give better QoR. --sa-placer can be used to use SA instead, and auto-fallback to SA if HeAP not built. Signed-off-by: David Shah --- CMakeLists.txt | 27 +++++++++++++++++++-------- common/placer_heap.cc | 18 +++++++++++++++++- ecp5/arch.cc | 19 ++++++++++++++++++- ecp5/main.cc | 8 +++++++- ice40/arch.cc | 11 +++++++---- ice40/main.cc | 5 ++++- 6 files changed, 72 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 69089c4c..ade76d60 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,8 @@ project(nextpnr) option(BUILD_GUI "Build GUI" ON) option(BUILD_PYTHON "Build Python Integration" ON) option(BUILD_TESTS "Build GUI" OFF) +option(BUILD_HEAP "Build HeAP analytic placer" ON) +option(USE_OPENMP "Use OpenMP to accelerate analytic placer" OFF) option(COVERAGE "Add code coverage info" OFF) option(STATIC_BUILD "Create static build" OFF) option(EXTERNAL_CHIPDB "Create build with pre-built chipdb binaries" OFF) @@ -53,12 +55,16 @@ endforeach() set(CMAKE_CXX_STANDARD 11) if (MSVC) -set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE) -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /D_DEBUG /W4 /wd4100 /wd4244 /wd4125 /wd4800 /wd4456 /wd4458 /wd4305 /wd4459 /wd4121 /wd4996") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /W4 /wd4100 /wd4244 /wd4125 /wd4800 /wd4456 /wd4458 /wd4305 /wd4459 /wd4121 /wd4996 /wd4127") + set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE) + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /D_DEBUG /W4 /wd4100 /wd4244 /wd4125 /wd4800 /wd4456 /wd4458 /wd4305 /wd4459 /wd4121 /wd4996") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /W4 /wd4100 /wd4244 /wd4125 /wd4800 /wd4456 /wd4458 /wd4305 /wd4459 /wd4121 /wd4996 /wd4127") else() -set(CMAKE_CXX_FLAGS_DEBUG "-Wall -fPIC -ggdb -pipe") -set(CMAKE_CXX_FLAGS_RELEASE "-Wall -fPIC -O3 -g -pipe -fopenmp") + set(CMAKE_CXX_FLAGS_DEBUG "-Wall -fPIC -ggdb -pipe") + if (USE_OPENMP) + set(CMAKE_CXX_FLAGS_RELEASE "-Wall -fPIC -O3 -g -pipe -fopenmp") + else() + set(CMAKE_CXX_FLAGS_RELEASE "-Wall -fPIC -O3 -g -pipe") + endif() endif() set(CMAKE_DEFIN) @@ -180,10 +186,15 @@ if (BUILD_PYTHON) endif () endif() -find_package (Eigen3 REQUIRED NO_MODULE) +include_directories(common/ json/ ${Boost_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS}) + +if(BUILD_HEAP) + find_package (Eigen3 REQUIRED NO_MODULE) + include_directories(${EIGEN3_INCLUDE_DIRS}) + add_definitions(${EIGEN3_DEFINITIONS}) + add_definitions(-DWITH_HEAP) +endif() -include_directories(common/ json/ ${Boost_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIRS}) -add_definitions(${EIGEN3_DEFINITIONS}) aux_source_directory(common/ COMMON_SRC_FILES) aux_source_directory(json/ JSON_PARSER_FILES) set(COMMON_FILES ${COMMON_SRC_FILES} ${JSON_PARSER_FILES}) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index b6913473..255a3f54 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -31,6 +31,8 @@ * - To make the placer timing-driven, the bound2bound weights are multiplied by (1 + 10 * crit^2) */ +#ifdef WITH_HEAP + #include #include #include @@ -1509,5 +1511,19 @@ class HeAPPlacer int HeAPPlacer::CutSpreader::seq = 0; bool placer_heap(Context *ctx) { return HeAPPlacer(ctx).place(); } - NEXTPNR_NAMESPACE_END + +#else + +#include "log.h" +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN +bool placer_heap(Context *ctx) +{ + log_error("nextpnr was built without the HeAP placer\n"); + return false; +} +NEXTPNR_NAMESPACE_END + +#endif diff --git a/ecp5/arch.cc b/ecp5/arch.cc index 5ea6a7c3..8385e57b 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -457,6 +457,7 @@ delay_t Arch::estimateDelay(WireId src, WireId dst) const auto src_loc = est_location(src), dst_loc = est_location(dst); int dx = abs(src_loc.first - dst_loc.first), dy = abs(src_loc.second - dst_loc.second); + return (130 - 25 * args.speed) * (6 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); @@ -486,6 +487,7 @@ delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const } int dx = abs(driver_loc.x - sink_loc.x), dy = abs(driver_loc.y - sink_loc.y); + return (130 - 25 * args.speed) * (6 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); } @@ -505,7 +507,22 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay // ----------------------------------------------------------------------- -bool Arch::place() { return placer_heap(getCtx()); } +bool Arch::place() +{ + // HeAP is the default unless overriden or not built +#ifdef WITH_HEAP + if (bool_or_default(settings, id("sa_placer"), false)) { +#endif + if (!placer1(getCtx(), Placer1Cfg(getCtx()))) + return false; +#ifdef WITH_HEAP + } else { + if (!placer_heap(getCtx())) + return false; + } +#endif + return true; +} bool Arch::route() { diff --git a/ecp5/main.cc b/ecp5/main.cc index 15027a5a..de279e63 100644 --- a/ecp5/main.cc +++ b/ecp5/main.cc @@ -59,6 +59,8 @@ po::options_description ECP5CommandHandler::getArchOptions() specific.add_options()("um5g-45k", "set device type to LFE5UM5G-45F"); specific.add_options()("um5g-85k", "set device type to LFE5UM5G-85F"); + specific.add_options()("sa-placer", "use pure simulated annealing placer instead of HeAP analytic placer"); + specific.add_options()("package", po::value(), "select device package (defaults to CABGA381)"); specific.add_options()("speed", po::value(), "select device speedgrade (6, 7 or 8)"); @@ -149,8 +151,12 @@ std::unique_ptr ECP5CommandHandler::createContext() chipArgs.speed = ArchArgs::SPEED_6; } } + auto ctx = std::unique_ptr(new Context(chipArgs)); - return std::unique_ptr(new Context(chipArgs)); + if (vm.count("sa-placer")) + ctx->settings[ctx->id("sa_placer")] = "1"; + + return ctx; } void ECP5CommandHandler::customAfterLoad(Context *ctx) diff --git a/ice40/arch.cc b/ice40/arch.cc index 5688b6e6..09e64b16 100644 --- a/ice40/arch.cc +++ b/ice40/arch.cc @@ -671,10 +671,13 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay bool Arch::place() { - // if (!placer1(getCtx(), Placer1Cfg(getCtx()))) - // return false; - if (!placer_heap(getCtx())) - return false; + if (bool_or_default(settings, id("heap_placer"), false)) { + if (!placer_heap(getCtx())) + return false; + } else { + if (!placer1(getCtx(), Placer1Cfg(getCtx()))) + return false; + } if (bool_or_default(settings, id("opt_timing"), false)) { TimingOptCfg tocfg(getCtx()); tocfg.cellTypes.insert(id_ICESTORM_LC); diff --git a/ice40/main.cc b/ice40/main.cc index 2313c2ae..7233f169 100644 --- a/ice40/main.cc +++ b/ice40/main.cc @@ -69,6 +69,8 @@ po::options_description Ice40CommandHandler::getArchOptions() specific.add_options()("promote-logic", "enable promotion of 'logic' globals (in addition to clk/ce/sr by default)"); specific.add_options()("no-promote-globals", "disable all global promotion"); + specific.add_options()("heap-placer", + "use HeAP analytic placer instead of simulated annealing (faster, experimental)"); specific.add_options()("opt-timing", "run post-placement timing optimisation pass (experimental)"); specific.add_options()("tmfuzz", "run path delay estimate fuzzer"); specific.add_options()("pcf-allow-unconstrained", "don't require PCF to constrain all IO"); @@ -176,7 +178,8 @@ std::unique_ptr Ice40CommandHandler::createContext() ctx->settings[ctx->id("opt_timing")] = "1"; if (vm.count("pcf-allow-unconstrained")) ctx->settings[ctx->id("pcf_allow_unconstrained")] = "1"; - + if (vm.count("heap-placer")) + ctx->settings[ctx->id("heap_placer")] = "1"; return ctx; } From bd12c0a4866e0d67bedd1c40f1205e9168b5d428 Mon Sep 17 00:00:00 2001 From: David Shah Date: Mon, 25 Feb 2019 12:48:01 +0000 Subject: [PATCH 47/59] HeAP: Add PlacerHeapCfg Signed-off-by: David Shah --- common/placer_heap.cc | 28 +++++++++++++++++++++------- common/placer_heap.h | 15 ++++++++++++++- common/timing.cc | 3 ++- ecp5/arch.cc | 7 +++++-- ice40/arch.cc | 4 +++- 5 files changed, 45 insertions(+), 12 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 255a3f54..037d8e47 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -33,6 +33,7 @@ #ifdef WITH_HEAP +#include "placer_heap.h" #include #include #include @@ -135,7 +136,7 @@ template struct EquationSystem class HeAPPlacer { public: - HeAPPlacer(Context *ctx) : ctx(ctx) { Eigen::initParallel(); } + HeAPPlacer(Context *ctx, PlacerHeapCfg cfg) : ctx(ctx), cfg(cfg) { Eigen::initParallel(); } bool place() { @@ -292,6 +293,7 @@ class HeAPPlacer private: Context *ctx; + PlacerHeapCfg cfg; int max_x = 0, max_y = 0; std::vector>>> fast_bels; @@ -497,8 +499,7 @@ class HeAPPlacer cell_locs[cell.first].locked = false; cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel); // FIXME - if (has_connectivity(cell.second) && cell.second->type != ctx->id("SB_IO") && - cell.second->type != ctx->id("TRELLIS_IO")) { + if (has_connectivity(cell.second) && !cfg.ioBufTypes.count(ci->type)) { place_cells.push_back(ci); placed = true; } else { @@ -640,7 +641,8 @@ class HeAPPlacer if (user_idx != -1 && net_crit.count(ni->name)) { auto &nc = net_crit.at(ni->name); if (user_idx < int(nc.criticality.size())) - weight *= (1.0 + 10 * std::pow(nc.criticality.at(user_idx), 2)); + weight *= (1.0 + cfg.timingWeight * + std::pow(nc.criticality.at(user_idx), cfg.criticalityExponent)); } // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, @@ -655,7 +657,7 @@ class HeAPPlacer }); } if (iter != -1) { - const float alpha = 0.1; + float alpha = cfg.alpha; for (size_t row = 0; row < solve_cells.size(); row++) { int l_pos = legal_pos(solve_cells.at(row)); int c_pos = cell_pos(solve_cells.at(row)); @@ -1510,20 +1512,32 @@ class HeAPPlacer }; int HeAPPlacer::CutSpreader::seq = 0; -bool placer_heap(Context *ctx) { return HeAPPlacer(ctx).place(); } +bool placer_heap(Context *ctx, PlacerHeapCfg cfg) { return HeAPPlacer(ctx, cfg).place(); } + +PlacerHeapCfg::PlacerHeapCfg(Context *ctx) : Settings(ctx) +{ + alpha = get("placerHeap/alpha", 0.1); + criticalityExponent = get("placerHeap/criticalityExponent", 2); + timingWeight = get("placerHeap/timingWeight", 10); +} + NEXTPNR_NAMESPACE_END #else #include "log.h" #include "nextpnr.h" +#include "placer_heap.h" NEXTPNR_NAMESPACE_BEGIN -bool placer_heap(Context *ctx) +bool placer_heap(Context *ctx, PlacerHeapCfg cfg) { log_error("nextpnr was built without the HeAP placer\n"); return false; } + +PlacerHeapCfg::PlacerHeapCfg(Context *ctx) : Settings(ctx) {} + NEXTPNR_NAMESPACE_END #endif diff --git a/common/placer_heap.h b/common/placer_heap.h index 5eb8a9ba..75c90c19 100644 --- a/common/placer_heap.h +++ b/common/placer_heap.h @@ -27,8 +27,21 @@ #ifndef PLACER_HEAP_H #define PLACER_HEAP #include "nextpnr.h" +#include "settings.h" NEXTPNR_NAMESPACE_BEGIN -extern bool placer_heap(Context *ctx); + +struct PlacerHeapCfg : public Settings +{ + PlacerHeapCfg(Context *ctx); + + float alpha; + float criticalityExponent; + float timingWeight; + + std::unordered_set ioBufTypes; +}; + +extern bool placer_heap(Context *ctx, PlacerHeapCfg cfg); NEXTPNR_NAMESPACE_END #endif \ No newline at end of file diff --git a/common/timing.cc b/common/timing.cc index 17adc078..2a0af874 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -611,7 +611,8 @@ struct Timing continue; delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay; for (size_t i = 0; i < net->users.size(); i++) { - float criticality = 1.0f - (float(nc.slack.at(i) - worst_slack.at(startdomain.first)) / dmax); + float criticality = + 1.0f - ((float(nc.slack.at(i)) - float(worst_slack.at(startdomain.first))) / dmax); nc.criticality.at(i) = std::min(1.0, std::max(0.0, criticality)); } nc.max_path_length = nd.max_path_length; diff --git a/ecp5/arch.cc b/ecp5/arch.cc index 8385e57b..938b5f8e 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -460,7 +460,6 @@ delay_t Arch::estimateDelay(WireId src, WireId dst) const return (130 - 25 * args.speed) * (6 + std::max(dx - 5, 0) + std::max(dy - 5, 0) + 2 * (std::min(dx, 5) + std::min(dy, 5))); - } delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const @@ -517,10 +516,14 @@ bool Arch::place() return false; #ifdef WITH_HEAP } else { - if (!placer_heap(getCtx())) + PlacerHeapCfg cfg(getCtx()); + cfg.criticalityExponent = 7; + cfg.ioBufTypes.insert(id_TRELLIS_IO); + if (!placer_heap(getCtx(), cfg)) return false; } #endif + permute_luts(); return true; } diff --git a/ice40/arch.cc b/ice40/arch.cc index 09e64b16..ce824c83 100644 --- a/ice40/arch.cc +++ b/ice40/arch.cc @@ -672,7 +672,9 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay bool Arch::place() { if (bool_or_default(settings, id("heap_placer"), false)) { - if (!placer_heap(getCtx())) + PlacerHeapCfg cfg(getCtx()); + cfg.ioBufTypes.insert(id_SB_IO); + if (!placer_heap(getCtx(), cfg)) return false; } else { if (!placer1(getCtx(), Placer1Cfg(getCtx()))) From f8a38c59f89b5f432dbd968ac577a4190cec7358 Mon Sep 17 00:00:00 2001 From: David Shah Date: Wed, 27 Feb 2019 11:18:39 +0000 Subject: [PATCH 48/59] common: Add early return path to getNetinfoRouteDelay for fully unrouted nets Signed-off-by: David Shah --- common/nextpnr.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/common/nextpnr.cc b/common/nextpnr.cc index b0cbbbeb..54333b15 100644 --- a/common/nextpnr.cc +++ b/common/nextpnr.cc @@ -221,6 +221,9 @@ delay_t Context::getNetinfoRouteDelay(const NetInfo *net_info, const PortRef &us return 0; #endif + if (net_info->wires.empty()) + return predictDelay(net_info, user_info); + WireId src_wire = getNetinfoSourceWire(net_info); if (src_wire == WireId()) return 0; From fcc3bb14959e96073f736050f4085b42589ea9a7 Mon Sep 17 00:00:00 2001 From: David Shah Date: Wed, 27 Feb 2019 11:57:36 +0000 Subject: [PATCH 49/59] ecp5: Speedup cell delay lookups Signed-off-by: David Shah --- ecp5/arch.cc | 8 +++++++- ecp5/arch.h | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/ecp5/arch.cc b/ecp5/arch.cc index 938b5f8e..d4b53f47 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -620,6 +620,11 @@ DecalXY Arch::getGroupDecal(GroupId pip) const { return {}; }; bool Arch::getDelayFromTimingDatabase(IdString tctype, IdString from, IdString to, DelayInfo &delay) const { + auto fnd_dk = celldelay_cache.find({tctype, from, to}); + if (fnd_dk != celldelay_cache.end()) { + delay = fnd_dk->second.second; + return fnd_dk->second.first; + } for (int i = 0; i < speed_grade->num_cell_timings; i++) { const auto &tc = speed_grade->cell_timings[i]; if (tc.cell_type == tctype.index) { @@ -628,9 +633,11 @@ bool Arch::getDelayFromTimingDatabase(IdString tctype, IdString from, IdString t if (dly.from_port == from.index && dly.to_port == to.index) { delay.max_delay = dly.max_delay; delay.min_delay = dly.min_delay; + celldelay_cache[{tctype, from, to}] = std::make_pair(true, delay); return true; } } + celldelay_cache[{tctype, from, to}] = std::make_pair(false, DelayInfo()); return false; } } @@ -660,7 +667,6 @@ void Arch::getSetupHoldFromTimingDatabase(IdString tctype, IdString clock, IdStr bool Arch::getCellDelay(const CellInfo *cell, IdString fromPort, IdString toPort, DelayInfo &delay) const { - // Data for -8 grade if (cell->type == id_TRELLIS_SLICE) { bool has_carry = cell->sliceInfo.is_carry; diff --git a/ecp5/arch.h b/ecp5/arch.h index ab4a4e00..b19e008f 100644 --- a/ecp5/arch.h +++ b/ecp5/arch.h @@ -448,6 +448,27 @@ struct ArchArgs } speed = SPEED_6; }; +struct DelayKey { + IdString celltype, from, to; + inline bool operator==(const DelayKey &other) const { + return celltype == other.celltype && from == other.from && to == other.to; + } +}; + +NEXTPNR_NAMESPACE_END +namespace std { + template<> + struct hash { + std::size_t operator()(const NEXTPNR_NAMESPACE_PREFIX DelayKey &dk) const noexcept { + std::size_t seed = std::hash()(dk.celltype); + seed ^= std::hash()(dk.from) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed ^= std::hash()(dk.to) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + } + }; +} +NEXTPNR_NAMESPACE_BEGIN + struct Arch : BaseCtx { const ChipInfoPOD *chip_info; @@ -1019,6 +1040,8 @@ struct Arch : BaseCtx IdString id_clk, id_lsr; IdString id_clkmux, id_lsrmux; IdString id_srmode, id_mode; + + mutable std::unordered_map> celldelay_cache; }; NEXTPNR_NAMESPACE_END From 801675a2c631b859c4668ada12422bf42aee2db8 Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 1 Mar 2019 15:25:16 +0000 Subject: [PATCH 50/59] placer1: Only get criticalities when in timing-driven mode Signed-off-by: David Shah --- common/placer1.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/placer1.cc b/common/placer1.cc index 9b4b066e..9ebf3ee6 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -361,7 +361,7 @@ class SAPlacer } // Invoke timing analysis to obtain criticalities - if (!cfg.budgetBased) + if (!cfg.budgetBased && ctx->timing_driven) get_criticalities(ctx, &net_crit); // Need to rebuild costs after criticalities change setup_costs(); From 23f2fff1c83eed5b80421c485cf887cf2d232f73 Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 22 Mar 2019 10:39:05 +0000 Subject: [PATCH 51/59] clangformat Signed-off-by: David Shah --- common/placer1.cc | 2 +- common/timing.cc | 5 ++--- ecp5/arch.h | 27 +++++++++++++++------------ gui/fpgaviewwidget.cc | 10 +++++----- 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index 9ebf3ee6..0c918f33 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -318,7 +318,7 @@ class SAPlacer "%.0f, wirelen = %.0f, dia = %d, Ra = %.02f \n", iter, temp, double(curr_timing_cost), double(curr_wirelen_cost), diameter, Raccept); - if (curr_wirelen_cost < 0.95 * avg_wirelen && curr_wirelen_cost > 0) { + if (curr_wirelen_cost < 0.95 * avg_wirelen && curr_wirelen_cost > 0) { avg_wirelen = 0.8 * avg_wirelen + 0.2 * curr_wirelen_cost; } else { double diam_next = diameter * (1.0 - 0.44 + Raccept); diff --git a/common/timing.cc b/common/timing.cc index 2a0af874..2ce9eea3 100644 --- a/common/timing.cc +++ b/common/timing.cc @@ -904,10 +904,9 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p if (!warn_on_failure || passed) log_info("Max frequency for clock %*s'%s': %.02f MHz (%s at %.02f MHz)\n", width, "", clock_name.c_str(), clock_fmax[clock.first], passed ? "PASS" : "FAIL", target); - else - if (bool_or_default(ctx->settings, ctx->id("timing/allowFail"), false)) + else if (bool_or_default(ctx->settings, ctx->id("timing/allowFail"), false)) log_warning("Max frequency for clock %*s'%s': %.02f MHz (%s at %.02f MHz)\n", width, "", - clock_name.c_str(), clock_fmax[clock.first], passed ? "PASS" : "FAIL", target); + clock_name.c_str(), clock_fmax[clock.first], passed ? "PASS" : "FAIL", target); else log_nonfatal_error("Max frequency for clock %*s'%s': %.02f MHz (%s at %.02f MHz)\n", width, "", clock_name.c_str(), clock_fmax[clock.first], passed ? "PASS" : "FAIL", target); diff --git a/ecp5/arch.h b/ecp5/arch.h index b19e008f..2e86988a 100644 --- a/ecp5/arch.h +++ b/ecp5/arch.h @@ -448,25 +448,28 @@ struct ArchArgs } speed = SPEED_6; }; -struct DelayKey { +struct DelayKey +{ IdString celltype, from, to; - inline bool operator==(const DelayKey &other) const { + inline bool operator==(const DelayKey &other) const + { return celltype == other.celltype && from == other.from && to == other.to; } }; NEXTPNR_NAMESPACE_END namespace std { - template<> - struct hash { - std::size_t operator()(const NEXTPNR_NAMESPACE_PREFIX DelayKey &dk) const noexcept { - std::size_t seed = std::hash()(dk.celltype); - seed ^= std::hash()(dk.from) + 0x9e3779b9 + (seed << 6) + (seed >> 2); - seed ^= std::hash()(dk.to) + 0x9e3779b9 + (seed << 6) + (seed >> 2); - return seed; - } - }; -} +template <> struct hash +{ + std::size_t operator()(const NEXTPNR_NAMESPACE_PREFIX DelayKey &dk) const noexcept + { + std::size_t seed = std::hash()(dk.celltype); + seed ^= std::hash()(dk.from) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed ^= std::hash()(dk.to) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + } +}; +} // namespace std NEXTPNR_NAMESPACE_BEGIN struct Arch : BaseCtx diff --git a/gui/fpgaviewwidget.cc b/gui/fpgaviewwidget.cc index c932c3e7..5eab20ed 100644 --- a/gui/fpgaviewwidget.cc +++ b/gui/fpgaviewwidget.cc @@ -645,10 +645,10 @@ void FPGAViewWidget::mousePressEvent(QMouseEvent *event) return; bool shift = QApplication::keyboardModifiers().testFlag(Qt::ShiftModifier); - bool ctrl = QApplication::keyboardModifiers().testFlag(Qt::ControlModifier); + bool ctrl = QApplication::keyboardModifiers().testFlag(Qt::ControlModifier); bool btn_right = event->buttons() & Qt::RightButton; - bool btn_mid = event->buttons() & Qt::MidButton; - bool btn_left = event->buttons() & Qt::LeftButton; + bool btn_mid = event->buttons() & Qt::MidButton; + bool btn_left = event->buttons() & Qt::LeftButton; if (btn_right || btn_mid || (btn_left && shift)) { lastDragPos_ = event->pos(); @@ -687,8 +687,8 @@ void FPGAViewWidget::mouseMoveEvent(QMouseEvent *event) bool shift = QApplication::keyboardModifiers().testFlag(Qt::ShiftModifier); bool btn_right = event->buttons() & Qt::RightButton; - bool btn_mid = event->buttons() & Qt::MidButton; - bool btn_left = event->buttons() & Qt::LeftButton; + bool btn_mid = event->buttons() & Qt::MidButton; + bool btn_left = event->buttons() & Qt::LeftButton; if (btn_right || btn_mid || (btn_left && shift)) { const int dx = event->x() - lastDragPos_.x(); From 81a7b5d2a113960f35111a2948fcc2804251821c Mon Sep 17 00:00:00 2001 From: David Shah Date: Fri, 22 Mar 2019 10:46:54 +0000 Subject: [PATCH 52/59] Update README.md Signed-off-by: David Shah --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5b79d1fb..0417e6f7 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ of the selected architecture: - Python 3.5 or later, including development libraries (`python3-dev` for Ubuntu) - on Windows make sure to install same version as supported by [vcpkg](https://github.com/Microsoft/vcpkg/blob/master/ports/python3/CONTROL) - Boost libraries (`libboost-dev libboost-filesystem-dev libboost-thread-dev libboost-program-options-dev libboost-python-dev libboost-dev` or `libboost-all-dev` for Ubuntu) +- Eigen3 (`libeigen3-dev` for Ubuntu) is required to build the analytic placer - Latest git Yosys is required to synthesise the demo design - For building on Windows with MSVC, usage of vcpkg is advised for dependency installation. - For 32 bit builds: `vcpkg install boost-filesystem boost-program-options boost-thread boost-python qt5-base` @@ -119,11 +120,11 @@ Use cmake `-D` options to specify which version of nextpnr you want to build. Use `-DARCH=...` to set the architecture. It is a semicolon separated list. Use `cmake . -DARCH=all` to build all supported architectures. -The following runs a debug build of the iCE40 architecture without GUI -and without Python support and only HX1K support: +The following runs a debug build of the iCE40 architecture without GUI, + without Python support, without the HeAP analytic placer and only HX1K support: ``` -cmake -DARCH=ice40 -DCMAKE_BUILD_TYPE=Debug -DBUILD_PYTHON=OFF -DBUILD_GUI=OFF -DICE40_HX1K_ONLY=1 . +cmake -DARCH=ice40 -DCMAKE_BUILD_TYPE=Debug -DBUILD_PYTHON=OFF -DBUILD_GUI=OFF -DBUILD_HEAP=OFF -DICE40_HX1K_ONLY=1 . make -j$(nproc) ``` @@ -134,6 +135,9 @@ cmake -DARCH=ice40 -DBUILD_PYTHON=OFF -DBUILD_GUI=OFF -DSTATIC_BUILD=ON . make -j$(nproc) ``` +The HeAP placer's solver can optionally use OpenMP for a speedup on very large designs. Enable this by passing +`-DUSE_OPENMP=yes` to cmake (compiler support may vary). + You can change the location where nextpnr will be installed (this will usually default to `/usr/local`) by using `-DCMAKE_INSTALL_PREFIX=/install/prefix`. From e04efa8c6eb54563702c41591526cd7356ed9013 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sat, 23 Mar 2019 20:51:57 +0000 Subject: [PATCH 53/59] Bump tests to include WAIVE --- tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests b/tests index f29dcbe1..32a68307 160000 --- a/tests +++ b/tests @@ -1 +1 @@ -Subproject commit f29dcbe187b517d01964b1074eb7ff0b90849eed +Subproject commit 32a683071758ee59d47e2c5cb29c87882993facd From 52e05f4a0706b1c108221e600ff11e654f6e85a5 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sat, 23 Mar 2019 23:15:05 +0100 Subject: [PATCH 54/59] placer1: Fix regression moving chained cells pre-legalise Signed-off-by: David Shah --- common/placer1.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index 0c918f33..99b24640 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -475,11 +475,12 @@ class SAPlacer { static const double epsilon = 1e-20; moveChange.reset(); - if (is_constrained(cell)) + if (!require_legal && is_constrained(cell)) return false; BelId oldBel = cell->bel; CellInfo *other_cell = ctx->getBoundBelCell(newBel); - if (other_cell != nullptr && (is_constrained(other_cell) || other_cell->belStrength > STRENGTH_WEAK)) { + if (!require_legal && other_cell != nullptr && + (is_constrained(other_cell) || other_cell->belStrength > STRENGTH_WEAK)) { return false; } int old_dist = get_constraints_distance(ctx, cell); From 02ae21d8fc3bc1375848f40702cd4bb7f6700595 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 24 Mar 2019 11:10:20 +0000 Subject: [PATCH 55/59] Add --placer option and refactor placer selection Signed-off-by: David Shah --- common/command.cc | 19 +++++++++++++++++++ common/settings.h | 27 +++++++++++++++++++-------- docs/archapi.md | 11 +++++++++++ ecp5/arch.cc | 30 +++++++++++++++++++++--------- ecp5/arch.h | 3 +++ ecp5/main.cc | 6 ------ generic/arch.cc | 15 ++++++++++++++- generic/arch.h | 3 +++ ice40/arch.cc | 15 +++++++++++++-- ice40/arch.h | 3 +++ ice40/main.cc | 4 ---- 11 files changed, 106 insertions(+), 30 deletions(-) diff --git a/common/command.cc b/common/command.cc index b7fc13e6..6f4137fe 100644 --- a/common/command.cc +++ b/common/command.cc @@ -27,6 +27,7 @@ #include "pybindings.h" #endif +#include #include #include #include @@ -120,6 +121,13 @@ po::options_description CommandHandler::getGeneralOptions() general.add_options()("json", po::value(), "JSON design file to ingest"); general.add_options()("seed", po::value(), "seed value for random number generator"); general.add_options()("randomize-seed,r", "randomize seed value for random number generator"); + + general.add_options()( + "placer", po::value(), + std::string("placer algorithm to use; available: " + boost::algorithm::join(Arch::availablePlacers, ", ") + + "; default: " + Arch::defaultPlacer) + .c_str()); + general.add_options()("slack_redist_iter", po::value(), "number of iterations between slack redistribution"); general.add_options()("cstrweight", po::value(), "placer weighting for relative constraint satisfaction"); general.add_options()("starttemp", po::value(), "placer SA start temperature"); @@ -186,6 +194,17 @@ void CommandHandler::setupContext(Context *ctx) settings->set("timing/allowFail", true); } + if (vm.count("placer")) { + std::string placer = vm["placer"].as(); + if (std::find(Arch::availablePlacers.begin(), Arch::availablePlacers.end(), placer) == + Arch::availablePlacers.end()) + log_error("Placer algorithm '%s' is not supported (available options: %s)\n", placer.c_str(), + boost::algorithm::join(Arch::availablePlacers, ", ").c_str()); + settings->set("placer", placer); + } else { + settings->set("placer", Arch::defaultPlacer); + } + if (vm.count("cstrweight")) { settings->set("placer1/constraintWeight", vm["cstrweight"].as()); } diff --git a/common/settings.h b/common/settings.h index 0c4a67db..b57947c9 100644 --- a/common/settings.h +++ b/common/settings.h @@ -45,19 +45,30 @@ class Settings return defaultValue; } - template void set(const char *name, T value) - { - IdString id = ctx->id(name); - auto pair = ctx->settings.emplace(id, std::to_string(value)); - if (!pair.second) { - ctx->settings[pair.first->first] = value; - } - } + template void set(const char *name, T value); private: Context *ctx; }; +template inline void Settings::set(const char *name, T value) +{ + IdString id = ctx->id(name); + auto pair = ctx->settings.emplace(id, std::to_string(value)); + if (!pair.second) { + ctx->settings[pair.first->first] = value; + } +} + +template <> inline void Settings::set(const char *name, std::string value) +{ + IdString id = ctx->id(name); + auto pair = ctx->settings.emplace(id, value); + if (!pair.second) { + ctx->settings[pair.first->first] = value; + } +} + NEXTPNR_NAMESPACE_END #endif // SETTINGS_H diff --git a/docs/archapi.md b/docs/archapi.md index 3c938865..6e59ecdb 100644 --- a/docs/archapi.md +++ b/docs/archapi.md @@ -490,3 +490,14 @@ a certain number of different clock signals allowed for a group of bels. Returns true if a bell in the current configuration is valid, i.e. if `isValidBelForCell()` would return true for the current mapping. + + +### static const std::string defaultPlacer + +Name of the default placement algorithm for the architecture, if +`--placer` isn't specified on the command line. + +### static const std::vector\ availablePlacers + +Name of available placer algorithms for the architecture, used +to provide help for and validate `--placer`. \ No newline at end of file diff --git a/ecp5/arch.cc b/ecp5/arch.cc index d4b53f47..9da8abdf 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -508,21 +508,21 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay bool Arch::place() { - // HeAP is the default unless overriden or not built -#ifdef WITH_HEAP - if (bool_or_default(settings, id("sa_placer"), false)) { -#endif - if (!placer1(getCtx(), Placer1Cfg(getCtx()))) - return false; -#ifdef WITH_HEAP - } else { + std::string placer = str_or_default(settings, id("placer"), defaultPlacer); + + if (placer == "heap") { PlacerHeapCfg cfg(getCtx()); cfg.criticalityExponent = 7; cfg.ioBufTypes.insert(id_TRELLIS_IO); if (!placer_heap(getCtx(), cfg)) return false; + } else if (placer == "sa") { + if (!placer1(getCtx(), Placer1Cfg(getCtx()))) + return false; + } else { + log_error("ECP5 architecture does not support placer '%s'\n", placer.c_str()); } -#endif + permute_luts(); return true; } @@ -986,4 +986,16 @@ WireId Arch::getBankECLK(int bank, int eclk) return getWireByLocAndBasename(Location(0, 0), "G_BANK" + std::to_string(bank) + "ECLK" + std::to_string(eclk)); } +#ifdef WITH_HEAP +const std::string Arch::defaultPlacer = "heap"; +#else +const std::string Arch::defaultPlacer = "sa"; +#endif + +const std::vector Arch::availablePlacers = {"sa", +#ifdef WITH_HEAP + "heap" +#endif +}; + NEXTPNR_NAMESPACE_END diff --git a/ecp5/arch.h b/ecp5/arch.h index 2e86988a..3de06a42 100644 --- a/ecp5/arch.h +++ b/ecp5/arch.h @@ -1045,6 +1045,9 @@ struct Arch : BaseCtx IdString id_srmode, id_mode; mutable std::unordered_map> celldelay_cache; + + static const std::string defaultPlacer; + static const std::vector availablePlacers; }; NEXTPNR_NAMESPACE_END diff --git a/ecp5/main.cc b/ecp5/main.cc index de279e63..bb18aa58 100644 --- a/ecp5/main.cc +++ b/ecp5/main.cc @@ -59,8 +59,6 @@ po::options_description ECP5CommandHandler::getArchOptions() specific.add_options()("um5g-45k", "set device type to LFE5UM5G-45F"); specific.add_options()("um5g-85k", "set device type to LFE5UM5G-85F"); - specific.add_options()("sa-placer", "use pure simulated annealing placer instead of HeAP analytic placer"); - specific.add_options()("package", po::value(), "select device package (defaults to CABGA381)"); specific.add_options()("speed", po::value(), "select device speedgrade (6, 7 or 8)"); @@ -152,10 +150,6 @@ std::unique_ptr ECP5CommandHandler::createContext() } } auto ctx = std::unique_ptr(new Context(chipArgs)); - - if (vm.count("sa-placer")) - ctx->settings[ctx->id("sa_placer")] = "1"; - return ctx; } diff --git a/generic/arch.cc b/generic/arch.cc index 77417d27..aca81559 100644 --- a/generic/arch.cc +++ b/generic/arch.cc @@ -21,6 +21,7 @@ #include "nextpnr.h" #include "placer1.h" #include "router1.h" +#include "util.h" NEXTPNR_NAMESPACE_BEGIN @@ -439,7 +440,16 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay // --------------------------------------------------------------- -bool Arch::place() { return placer1(getCtx(), Placer1Cfg(getCtx())); } +bool Arch::place() +{ + std::string placer = str_or_default(settings, id("placer"), defaultPlacer); + // FIXME: No HeAP because it needs a list of IO buffers + if (placer == "sa") { + return placer1(getCtx(), Placer1Cfg(getCtx())); + } else { + log_error("Generic architecture does not support placer '%s'\n", placer.c_str()); + } +} bool Arch::route() { return router1(getCtx(), Router1Cfg(getCtx())); } @@ -476,4 +486,7 @@ TimingClockingInfo Arch::getPortClockingInfo(const CellInfo *cell, IdString port bool Arch::isValidBelForCell(CellInfo *cell, BelId bel) const { return true; } bool Arch::isBelLocationValid(BelId bel) const { return true; } +const std::string Arch::defaultPlacer = "sa"; +const std::vector Arch::availablePlacers = {"sa"}; + NEXTPNR_NAMESPACE_END diff --git a/generic/arch.h b/generic/arch.h index dc4258cc..5b5d8c55 100644 --- a/generic/arch.h +++ b/generic/arch.h @@ -240,6 +240,9 @@ struct Arch : BaseCtx bool isValidBelForCell(CellInfo *cell, BelId bel) const; bool isBelLocationValid(BelId bel) const; + + static const std::string defaultPlacer; + static const std::vector availablePlacers; }; NEXTPNR_NAMESPACE_END diff --git a/ice40/arch.cc b/ice40/arch.cc index ce824c83..b0839fa5 100644 --- a/ice40/arch.cc +++ b/ice40/arch.cc @@ -671,14 +671,17 @@ bool Arch::getBudgetOverride(const NetInfo *net_info, const PortRef &sink, delay bool Arch::place() { - if (bool_or_default(settings, id("heap_placer"), false)) { + std::string placer = str_or_default(settings, id("placer"), defaultPlacer); + if (placer == "heap") { PlacerHeapCfg cfg(getCtx()); cfg.ioBufTypes.insert(id_SB_IO); if (!placer_heap(getCtx(), cfg)) return false; - } else { + } else if (placer == "sa") { if (!placer1(getCtx(), Placer1Cfg(getCtx()))) return false; + } else { + log_error("iCE40 architecture does not support placer '%s'\n", placer.c_str()); } if (bool_or_default(settings, id("opt_timing"), false)) { TimingOptCfg tocfg(getCtx()); @@ -1205,4 +1208,12 @@ void Arch::assignCellInfo(CellInfo *cell) } } +const std::string Arch::defaultPlacer = "sa"; + +const std::vector Arch::availablePlacers = {"sa", +#ifdef WITH_HEAP + "heap" +#endif +}; + NEXTPNR_NAMESPACE_END diff --git a/ice40/arch.h b/ice40/arch.h index 706043b2..ea29f4f1 100644 --- a/ice40/arch.h +++ b/ice40/arch.h @@ -897,6 +897,9 @@ struct Arch : BaseCtx IdString glb_net = getWireName(getBelPinWire(bel, id_GLOBAL_BUFFER_OUTPUT)); return std::stoi(std::string("") + glb_net.str(this).back()); } + + static const std::string defaultPlacer; + static const std::vector availablePlacers; }; void ice40DelayFuzzerMain(Context *ctx); diff --git a/ice40/main.cc b/ice40/main.cc index 7233f169..9b79a08c 100644 --- a/ice40/main.cc +++ b/ice40/main.cc @@ -69,8 +69,6 @@ po::options_description Ice40CommandHandler::getArchOptions() specific.add_options()("promote-logic", "enable promotion of 'logic' globals (in addition to clk/ce/sr by default)"); specific.add_options()("no-promote-globals", "disable all global promotion"); - specific.add_options()("heap-placer", - "use HeAP analytic placer instead of simulated annealing (faster, experimental)"); specific.add_options()("opt-timing", "run post-placement timing optimisation pass (experimental)"); specific.add_options()("tmfuzz", "run path delay estimate fuzzer"); specific.add_options()("pcf-allow-unconstrained", "don't require PCF to constrain all IO"); @@ -178,8 +176,6 @@ std::unique_ptr Ice40CommandHandler::createContext() ctx->settings[ctx->id("opt_timing")] = "1"; if (vm.count("pcf-allow-unconstrained")) ctx->settings[ctx->id("pcf_allow_unconstrained")] = "1"; - if (vm.count("heap-placer")) - ctx->settings[ctx->id("heap_placer")] = "1"; return ctx; } From 00b09fbb435fa5bece77c95435cb66e1f05ddca8 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 24 Mar 2019 11:18:38 +0000 Subject: [PATCH 56/59] HeAP: Per-iteration output all on one line Signed-off-by: David Shah --- common/placer_heap.cc | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 037d8e47..26a49c6d 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -164,7 +164,7 @@ class HeAPPlacer log_info("Initial placer iter %d, hpwl = %d\n", i, int(hpwl)); } - wirelen_t solved_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits::max(); + wirelen_t solved_hpwl = 0, spread_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits::max(); int iter = 0, stalled = 0; std::vector> solution; @@ -213,23 +213,21 @@ class HeAPPlacer solve_time += std::chrono::duration(solve_endt - solve_startt).count(); update_all_chains(); solved_hpwl = total_hpwl(); - log_info("Solved HPWL = %d\n", int(solved_hpwl)); update_all_chains(); for (auto type : sorted(run)) CutSpreader(this, type).run(); update_all_chains(); - legal_hpwl = total_hpwl(); - log_info("Spread HPWL = %d\n", int(legal_hpwl)); + spread_hpwl = total_hpwl(); legalise_placement_strict(true); update_all_chains(); legal_hpwl = total_hpwl(); - log_info("Legalised HPWL = %d\n", int(legal_hpwl)); auto run_stopt = std::chrono::high_resolution_clock::now(); - log_info(" %s runtime: %.02fs\n", (run.size() > 1 ? "ALL" : run.begin()->c_str(ctx)), - std::chrono::duration(run_stopt - run_startt).count()); + log_info("Iter %d type %s: HPWL solved = %d, spread = %d, legal = %d; time = %.02fs\n", iter, + (run.size() > 1 ? "ALL" : run.begin()->c_str(ctx)), int(solved_hpwl), int(spread_hpwl), + int(legal_hpwl), std::chrono::duration(run_stopt - run_startt).count()); } if (ctx->timing_driven) From efc687234181d6ab464230d3844313c2fcf51282 Mon Sep 17 00:00:00 2001 From: David Shah Date: Sun, 24 Mar 2019 13:59:27 +0000 Subject: [PATCH 57/59] placer1: Restore old weighting in budget-based mode Signed-off-by: David Shah --- common/placer1.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/placer1.cc b/common/placer1.cc index 99b24640..a71cbc9a 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -740,7 +740,7 @@ class SAPlacer return 0; if (cfg.budgetBased) { double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user))); - return std::min(10.0, std::exp(delay - ctx->getDelayNS(net->users.at(user).budget))); + return std::min(10.0, std::exp(delay - ctx->getDelayNS(net->users.at(user).budget) / 10)); } else { auto crit = net_crit.find(net->name); if (crit == net_crit.end() || crit->second.criticality.empty()) From 49e827a9b43a5b073d848f63fe4d4c86e8ca01e6 Mon Sep 17 00:00:00 2001 From: David Shah Date: Mon, 25 Mar 2019 12:41:25 +0000 Subject: [PATCH 58/59] HeAP: Add missing newline Signed-off-by: David Shah --- common/placer_heap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/placer_heap.h b/common/placer_heap.h index 75c90c19..f94489c1 100644 --- a/common/placer_heap.h +++ b/common/placer_heap.h @@ -44,4 +44,4 @@ struct PlacerHeapCfg : public Settings extern bool placer_heap(Context *ctx, PlacerHeapCfg cfg); NEXTPNR_NAMESPACE_END -#endif \ No newline at end of file +#endif From 25e3350675c091c2fb54e51c9fcb7e79bbe6e279 Mon Sep 17 00:00:00 2001 From: David Shah Date: Mon, 25 Mar 2019 15:39:15 +0000 Subject: [PATCH 59/59] HeAP: Make log output more consistent Signed-off-by: David Shah --- common/placer1.cc | 1 + common/placer_heap.cc | 13 ++++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/common/placer1.cc b/common/placer1.cc index a71cbc9a..98251627 100644 --- a/common/placer1.cc +++ b/common/placer1.cc @@ -242,6 +242,7 @@ class SAPlacer } require_legal = false; diameter = 3; + log_info("Running simulated annealing placer for refinement.\n"); } auto saplace_start = std::chrono::high_resolution_clock::now(); diff --git a/common/placer_heap.cc b/common/placer_heap.cc index 26a49c6d..7caf536d 100644 --- a/common/placer_heap.cc +++ b/common/placer_heap.cc @@ -148,7 +148,8 @@ class HeAPPlacer seed_placement(); update_all_chains(); wirelen_t hpwl = total_hpwl(); - log_info("Initial placer starting hpwl = %d\n", int(hpwl)); + log_info("Creating initial analytic placement for %d cells, random placement wirelen = %d.\n", + int(place_cells.size()), int(hpwl)); for (int i = 0; i < 4; i++) { setup_solve_cells(); auto solve_startt = std::chrono::high_resolution_clock::now(); @@ -161,7 +162,7 @@ class HeAPPlacer update_all_chains(); hpwl = total_hpwl(); - log_info("Initial placer iter %d, hpwl = %d\n", i, int(hpwl)); + log_info(" at initial placer iter %d, wirelen = %d\n", i, int(hpwl)); } wirelen_t solved_hpwl = 0, spread_hpwl = 0, legal_hpwl = 0, best_hpwl = std::numeric_limits::max(); @@ -190,6 +191,7 @@ class HeAPPlacer heap_runs.push_back(all_celltypes); // The main HeAP placer loop + log_info("Running main analytical placer.\n"); while (stalled < 5 && (solved_hpwl <= legal_hpwl * 0.8)) { // Alternate between particular Bel types and all bels for (auto &run : heap_runs) { @@ -225,9 +227,10 @@ class HeAPPlacer legal_hpwl = total_hpwl(); auto run_stopt = std::chrono::high_resolution_clock::now(); - log_info("Iter %d type %s: HPWL solved = %d, spread = %d, legal = %d; time = %.02fs\n", iter, - (run.size() > 1 ? "ALL" : run.begin()->c_str(ctx)), int(solved_hpwl), int(spread_hpwl), - int(legal_hpwl), std::chrono::duration(run_stopt - run_startt).count()); + log_info(" at iteration #%d, type %s: wirelen solved = %d, spread = %d, legal = %d; time = %.02fs\n", + iter + 1, (run.size() > 1 ? "ALL" : run.begin()->c_str(ctx)), int(solved_hpwl), + int(spread_hpwl), int(legal_hpwl), + std::chrono::duration(run_stopt - run_startt).count()); } if (ctx->timing_driven)