nextpnr/common/place/placer_static.cc
2024-10-08 15:20:33 +02:00

1541 lines
60 KiB
C++

/*
* nextpnr -- Next Generation Place and Route
*
* Copyright (C) 2022 gatecat <gatecat@ds0.me>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#include "placer_static.h"
#include "static_util.h"
#include <boost/optional.hpp>
#include <chrono>
#include <deque>
#include <fstream>
#include <numeric>
#include <queue>
#include <tuple>
#include "array2d.h"
#include "fast_bels.h"
#include "log.h"
#include "nextpnr.h"
#include "parallel_refine.h"
#include "place_common.h"
#include "placer1.h"
#include "scope_lock.h"
#include "timing.h"
#include "util.h"
#include "fftsg.h"
#ifndef NPNR_DISABLE_THREADS
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <thread>
#endif
NEXTPNR_NAMESPACE_BEGIN
using namespace StaticUtil;
namespace {
struct PlacerGroup
{
bool enabled = true;
int total_bels = 0;
double concrete_area = 0;
double dark_area = 0;
double total_area = 0;
array2d<float> loc_area;
float overlap = 0;
array2d<double> conc_density; // excludes fillers and dark nodes
array2d<double> density;
// FFT related data (TODO: should these be per group?)
FFTArray density_fft;
FFTArray electro_phi;
FFTArray electro_fx, electro_fy;
double init_potential = 0;
double curr_potential = 0;
};
// Could be an actual concrete netlist cell; or just a spacer
struct MoveCell
{
StaticRect rect;
// TODO: multiple contiguous vectors is probably faster than an array of structs, but also messier
RealPair pos, ref_pos, last_pos, last_ref_pos;
RealPair ref_wl_grad, wl_grad, last_wl_grad;
RealPair ref_dens_grad, dens_grad, last_dens_grad;
RealPair ref_total_grad, total_grad, last_total_grad;
int32_t pin_count;
int16_t group;
int16_t bx, by; // bins
bool is_fixed : 1;
bool is_spacer : 1;
bool is_dark : 1;
};
// Extra data for cells that aren't spacers
struct ConcreteCell
{
CellInfo *base_cell;
// When cells are macros; we split them up into chunks
// based on dx/dy location
int32_t macro_idx = -1;
int16_t chunk_dx = 0, chunk_dy = 0;
};
struct ClusterGroupKey
{
ClusterGroupKey(int dx = 0, int dy = 0, int group = -1) : dx(dx), dy(dy), group(group) {};
bool operator==(const ClusterGroupKey &other) const
{
return dx == other.dx && dy == other.dy && group == other.group;
}
unsigned hash() const { return mkhash(mkhash(dx, dy), group); }
int16_t dx, dy, group;
};
struct PlacerMacro
{
CellInfo *root;
std::vector<int32_t> conc_cells;
dict<ClusterGroupKey, std::vector<CellInfo *>> cells;
};
struct PlacerBin
{
float density;
// ...
};
struct PlacerPort
{
// for wirelength data
static constexpr float invalid = std::numeric_limits<float>::lowest();
PortRef ref;
RealPair max_exp{invalid, invalid};
RealPair min_exp{invalid, invalid};
bool has_max_exp(Axis axis) const { return max_exp.at(axis) != invalid; }
bool has_min_exp(Axis axis) const { return min_exp.at(axis) != invalid; }
};
struct PlacerNet
{
NetInfo *ni;
bool skip = false;
RealPair b1, b0; // real bounding box
RealPair min_exp, x_min_exp;
RealPair max_exp, x_max_exp;
RealPair wa_wl;
// lines up with user indexes; plus one for driver
std::vector<PlacerPort> ports;
int hpwl() { return (b1.x - b0.x) + (b1.y - b0.y); }
};
#ifdef NPNR_DISABLE_THREADS
struct ThreadPool
{
ThreadPool(int) {};
void run(int N, std::function<void(int)> func)
{
for (int i = 0; i < N; i++)
func(i);
};
};
#else
struct ThreadPool
{
ThreadPool(int thread_count)
{
done.resize(thread_count, false);
for (int i = 0; i < thread_count; i++) {
threads.emplace_back([this, i]() { this->worker(i); });
}
}
std::vector<std::thread> threads;
std::condition_variable cv_start, cv_done;
std::mutex mutex;
bool work_available = false;
bool shutdown = false;
std::vector<bool> done;
std::function<void(int)> work;
int work_count;
~ThreadPool()
{
{
std::lock_guard lk(mutex);
shutdown = true;
}
cv_start.notify_all();
for (auto &t : threads)
t.join();
}
void run(int N, std::function<void(int)> func)
{
{
std::lock_guard lk(mutex);
work = func;
work_count = N;
work_available = true;
std::fill(done.begin(), done.end(), false);
}
cv_start.notify_all();
{
std::unique_lock lk(mutex);
cv_done.wait(lk, [this] { return std::all_of(done.begin(), done.end(), [](bool x) { return x; }); });
work_available = false;
}
}
void worker(int idx)
{
while (true) {
std::unique_lock lk(mutex);
cv_start.wait(lk, [this, idx] { return (work_available && !done.at(idx)) || shutdown; });
if (shutdown) {
lk.unlock();
break;
} else if (work_available && !done.at(idx)) {
int work_per_thread = (work_count + int(threads.size()) - 1) / threads.size();
int begin = work_per_thread * idx;
int end = std::min(work_count, work_per_thread * (idx + 1));
lk.unlock();
for (int j = begin; j < end; j++) {
work(j);
}
lk.lock();
done.at(idx) = true;
lk.unlock();
cv_done.notify_one();
}
}
}
};
#endif
class StaticPlacer
{
Context *ctx;
PlacerStaticCfg cfg;
std::vector<MoveCell> mcells;
std::vector<ConcreteCell> ccells;
std::vector<PlacerMacro> macros;
std::vector<PlacerGroup> groups;
std::vector<PlacerNet> nets;
idict<ClusterId> cluster2idx;
FastBels fast_bels;
TimingAnalyser tmg;
ThreadPool pool;
int width, height;
int iter = 0;
bool fft_debug = false;
bool dump_density = false;
// legalisation queue
std::priority_queue<std::pair<int, IdString>> to_legalise;
void prepare_cells()
{
for (auto &cell : ctx->cells) {
CellInfo *ci = cell.second.get();
ci->udata = -1;
// process legacy-ish bel attributes
if (ci->attrs.count(ctx->id("BEL")) && ci->bel == BelId()) {
std::string loc_name = ci->attrs.at(ctx->id("BEL")).as_string();
BelId bel = ctx->getBelByNameStr(loc_name);
NPNR_ASSERT(ctx->isValidBelForCellType(ci->type, bel));
NPNR_ASSERT(ctx->checkBelAvail(bel));
ctx->bindBel(bel, ci, STRENGTH_USER);
}
}
}
bool lookup_group(IdString type, int &group, StaticRect &rect)
{
for (size_t i = 0; i < cfg.cell_groups.size(); i++) {
const auto &g = cfg.cell_groups.at(i);
if (g.cell_area.count(type)) {
group = i;
rect = g.cell_area.at(type);
return true;
}
}
return false;
}
void init_bels()
{
log_info("⌁ initialising bels...\n");
width = 0;
height = 0;
for (auto bel : ctx->getBels()) {
Loc loc = ctx->getBelLocation(bel);
width = std::max(width, loc.x + 1);
height = std::max(height, loc.y + 1);
}
dict<IdString, int> beltype2group;
for (int i = 0; i < int(groups.size()); i++) {
groups.at(i).loc_area.reset(width, height);
for (const auto &bel_type : cfg.cell_groups.at(i).bel_area)
beltype2group[bel_type.first] = i;
}
for (auto bel : ctx->getBels()) {
Loc loc = ctx->getBelLocation(bel);
IdString type = ctx->getBelType(bel);
auto fnd = beltype2group.find(type);
if (fnd == beltype2group.end())
continue;
auto size = cfg.cell_groups.at(fnd->second).bel_area.at(type); // TODO: do we care about dimensions too
auto &group = groups.at(fnd->second);
for (int dy = 0; dy <= int(size.h); dy++) {
for (int dx = 0; dx <= int(size.w); dx++) {
float h = (dy == int(size.h)) ? (size.h - int(size.h)) : 1;
float w = (dx == int(size.w)) ? (size.w - int(size.w)) : 1;
group.loc_area.at(loc.x + dx, loc.y + dy) += w * h;
}
}
group.total_area += size.area();
group.total_bels += 1;
}
}
void init_nets()
{
nets.reserve(ctx->nets.size());
for (auto &net : ctx->nets) {
NetInfo *ni = net.second.get();
ni->udata = nets.size();
nets.emplace_back();
auto &nd = nets.back();
nd.ni = ni;
nd.skip = (ni->driver.cell == nullptr); // (or global buffer?)
nd.ports.resize(ni->users.capacity() + 1); // +1 for the driver
nd.ports.back().ref = ni->driver;
for (auto usr : ni->users.enumerate()) {
nd.ports.at(usr.index.idx()).ref = usr.value;
}
}
}
int add_cell(StaticRect rect, int group, RealPair pos, CellInfo *ci = nullptr)
{
int idx = mcells.size();
mcells.emplace_back();
auto &m = mcells.back();
m.rect = rect;
m.group = group;
m.pos = pos;
if (ci) {
// Is a concrete cell (might be a macro, in which case ci is just one of them...)
// Can't add concrete cells once we have spacers (we define it such that indices line up between mcells and
// ccells; spacer cells only exist in mcells)
NPNR_ASSERT(idx == int(ccells.size()));
ccells.emplace_back();
auto &c = ccells.back();
c.base_cell = ci;
groups.at(group).concrete_area += rect.area();
} else {
// Is a spacer cell
m.is_spacer = true;
}
return idx;
}
const float pi = 3.141592653589793f;
RealPair rand_loc()
{
// Box-muller
float u1 = ctx->rngf(1.0f);
while (u1 < 1e-5)
u1 = ctx->rngf(1.0f);
float u2 = ctx->rngf(1.0f);
float m = std::sqrt(-2.f * std::log(u1));
float z0 = m * std::cos(2.f * pi * u2);
float z1 = m * std::sin(2.f * pi * u2);
float x = (width / 2.f) + (width / 250.f) * z0;
float y = (height / 2.f) + (height / 250.f) * z1;
x = std::min<float>(width - 1.f, std::max<float>(x, 0));
y = std::min<float>(height - 1.f, std::max<float>(y, 0));
return RealPair(x, y);
}
RealPair cell_loc(CellInfo *ci, bool ref)
{
if (ci->udata == -1) {
// not handled?
NPNR_ASSERT_MSG(ci->bel != BelId(),
stringf("Cell %s of type %s has no bel", ci->name.c_str(ctx), ci->type.c_str(ctx))
.c_str()); // already fixed
return RealPair(ctx->getBelLocation(ci->bel), 0.5f);
} else {
return ref ? mcells.at(ci->udata).ref_pos : mcells.at(ci->udata).pos;
}
}
void init_cells()
{
log_info("⌁ initialising cells...\n");
// Process non-clustered cells and find clusters
for (auto &cell : ctx->cells) {
CellInfo *ci = cell.second.get();
int cell_group;
StaticRect rect;
// Mismatched group case
if (!lookup_group(ci->type, cell_group, rect)) {
if (ci->bel == BelId()) {
for (auto bel : ctx->getBels()) {
if (ctx->isValidBelForCellType(ci->type, bel) && ctx->checkBelAvail(bel)) {
ctx->bindBel(bel, ci, STRENGTH_STRONG);
if (!ctx->isBelLocationValid(bel)) {
ctx->unbindBel(bel);
} else {
log_info(" placed potpourri cell '%s' at bel '%s'\n", ctx->nameOf(ci),
ctx->nameOfBel(bel));
break;
}
}
}
}
continue;
}
if (ci->cluster != ClusterId()) {
// Defer processing of macro clusters
int c_idx = cluster2idx(ci->cluster);
if (c_idx >= int(macros.size())) {
macros.emplace_back();
macros.back().root = ctx->getClusterRootCell(ci->cluster);
}
auto &m = macros.at(c_idx);
Loc delta = ctx->getClusterOffset(ci);
m.cells[ClusterGroupKey(delta.x, delta.y, cell_group)].push_back(ci);
} else {
// Non-clustered cells can be processed already
int idx = add_cell(rect, cell_group, rand_loc(), ci);
ci->udata = idx;
auto &mc = mcells.at(idx);
mc.pin_count += int(ci->ports.size());
if (ci->bel != BelId()) {
// Currently; treat all ready-placed cells as fixed (eventually we might do incremental ripups
// here...)
Loc loc = ctx->getBelLocation(ci->bel);
mc.pos.x = loc.x + 0.5;
mc.pos.y = loc.y + 0.5;
mc.is_fixed = true;
}
}
}
// Process clustered cells
for (int i = 0; i < int(macros.size()); i++) {
auto &m = macros.at(i);
for (auto &kv : m.cells) {
const auto &g = cfg.cell_groups.at(kv.first.group);
// Only treat zero-area cells as zero-area; if this cluster also contains non-zero area cells
bool has_nonzero = std::any_of(kv.second.begin(), kv.second.end(),
[&](const CellInfo *ci) { return !g.zero_area_cells.count(ci->type); });
StaticRect cluster_size;
for (auto ci : kv.second) {
if (has_nonzero && g.zero_area_cells.count(ci->type))
continue;
// Compute an equivalent-area stacked rectange for cells in this cluster group.
// There are probably some ugly cases this handles badly.
StaticRect r = g.cell_area.at(ci->type);
if (r.w > r.h) {
// Long and thin, "stack" vertically
// Compute height we add to stack
if (cluster_size.w < r.w) {
cluster_size.h *= (cluster_size.w / r.w);
cluster_size.w = r.w;
}
cluster_size.h += ((r.w * r.h) / cluster_size.w);
} else {
// "stack" horizontally
if (cluster_size.h < r.h) {
cluster_size.w *= (cluster_size.h / r.h);
cluster_size.h = r.h;
}
cluster_size.w += ((r.w * r.h) / cluster_size.h);
}
}
// Now add the moveable cell
if (cluster_size.area() > 0) {
int idx = add_cell(cluster_size, kv.first.group, rand_loc(), kv.second.front());
auto &mc = mcells.at(idx);
if (kv.second.front()->bel != BelId()) {
// Currently; treat all ready-placed cells as fixed (eventually we might do incremental ripups
// here...)
Loc loc = ctx->getBelLocation(kv.second.front()->bel);
mc.pos.x = loc.x + 0.5;
mc.pos.y = loc.y + 0.5;
mc.is_fixed = true;
}
for (auto ci : kv.second) {
ci->udata = idx;
mc.pin_count += int(ci->ports.size());
}
auto &cc = ccells.at(idx);
cc.macro_idx = i;
cc.chunk_dx = kv.first.dx;
cc.chunk_dy = kv.first.dy;
m.conc_cells.push_back(idx);
}
}
}
}
const double target_util = 0.7;
void insert_dark()
{
log_info("⌁ inserting dark nodes...\n");
for (int group = 0; group < int(groups.size()); group++) {
const auto &cg = cfg.cell_groups.at(group);
auto &g = groups.at(group);
int dark_count = 0;
for (auto tile : g.loc_area) {
if (tile.value > 0.5f)
continue;
StaticRect dark_area(1.0f, 1.0f - tile.value);
int cell_idx = add_cell(dark_area, group, RealPair(tile.x, tile.y), nullptr /*spacer*/);
mcells.at(cell_idx).is_dark = true;
++dark_count;
}
log_info("⌁ group %s inserted a total of %d dark nodes\n", ctx->nameOf(cg.name), dark_count);
}
}
void insert_spacer()
{
log_info("⌁ inserting spacers...\n");
int inserted_spacers = 0;
for (int group = 0; group < int(groups.size()); group++) {
const auto &cg = cfg.cell_groups.at(group);
const auto &g = groups.at(group);
double util = g.concrete_area / g.total_area;
log_info("⌁ group %s pre-spacer utilisation %.02f%% (target %.02f%%)\n", ctx->nameOf(cg.name),
(util * 100.0), (target_util * 100.0));
// TODO: better computation of spacer size and placement?
int spacer_count = (g.total_area * target_util - g.concrete_area) / cg.spacer_rect.area();
if (spacer_count <= 0)
continue;
while (inserted_spacers < spacer_count) {
int x = ctx->rng(width);
int y = ctx->rng(height);
// avoid placing spacers at locations with dark nodes
if (ctx->rngf(1.0) > g.loc_area.at(x, y))
continue;
add_cell(cg.spacer_rect, group, RealPair(x + ctx->rngf(1.0), y + ctx->rngf(1.0)), nullptr /*spacer*/);
++inserted_spacers;
}
}
log_info("⌁ inserted a total of %d spacers\n", inserted_spacers);
}
// TODO: dark node insertion when we have obstructions or non-rectangular placement regions
int m;
double bin_w, bin_h;
std::vector<float> cs_table_fft;
std::vector<int> work_area_fft;
void prepare_density_bins()
{
// TODO: a m x m grid follows the paper and makes the DCTs easier, but is it actually ideal for non-square
// FPGAs?
m = 1 << int(std::ceil(std::log2(std::max(width, height))));
bin_w = double(width) / m;
bin_h = double(height) / m;
for (auto &g : groups) {
g.density.reset(m, m, 0);
g.density_fft.reset(m, m, 0);
g.electro_phi.reset(m, m, 0);
g.electro_fx.reset(m, m, 0);
g.electro_fy.reset(m, m, 0);
}
cs_table_fft.resize(m * 3 / 2, 0);
work_area_fft.resize(std::round(std::sqrt(m)) + 2, 0);
work_area_fft.at(0) = 0;
}
template <typename TFunc> void iter_slithers(RealPair pos, StaticRect rect, TFunc func)
{
// compute the stamp over bins (this could probably be more efficient?)
double width = rect.w, height = rect.h;
double scaled_density = 1.0;
if (width < bin_w) {
scaled_density *= (width / bin_w);
width = bin_w;
}
if (height < bin_h) {
scaled_density *= (height / bin_h);
height = bin_h;
}
double x0 = pos.x, x1 = pos.x + width;
double y0 = pos.y, y1 = pos.y + height;
for (int y = int(y0 / bin_h); y <= int(y1 / bin_h); y++) {
for (int x = int(x0 / bin_w); x <= int(x1 / bin_w); x++) {
if (x < 0 || x >= m || y < 0 || y >= m)
continue;
double slither_w = 1.0, slither_h = 1.0;
if (y == int(y0 / bin_h)) // y slithers
slither_h = ((y + 1) * bin_h) - y0;
else if (y == int(y1 / bin_h))
slither_h = (y1 - y * bin_h);
if (x == int(x0 / bin_w)) // x slithers
slither_w = ((x + 1) * bin_w) - x0;
else if (x == int(x1 / bin_w))
slither_w = (x1 - x * bin_w);
func(x, y, scaled_density * slither_w * slither_h);
}
}
};
void compute_density(int group, bool ref)
{
auto &g = groups.at(group);
// reset
for (auto entry : g.density)
entry.value = 0;
// populate
for (int idx = 0; idx < int(mcells.size()); idx++) {
auto &mc = mcells.at(idx);
if (mc.group != group)
continue;
// scale width and height to be at least one bin (local density smoothing from the eplace paper)
// TODO: should we really do this every iteration?
auto pos = ref ? mc.ref_pos : mc.pos;
iter_slithers(pos, mc.rect, [&](int x, int y, float area) { g.density.at(x, y) += area; });
}
}
void compute_conc_density()
{
for (auto &g : groups)
g.conc_density.reset(width, height, 0);
for (int idx = 0; idx < int(ccells.size()); idx++) {
auto &mc = mcells.at(idx);
auto &g = groups.at(mc.group);
auto loc = mc.pos;
auto size = mc.rect;
for (int dy = 0; dy <= int(size.h); dy++) {
for (int dx = 0; dx <= int(size.w); dx++) {
float h = (dy == int(size.h)) ? (size.h - int(size.h)) : 1;
float w = (dx == int(size.w)) ? (size.w - int(size.w)) : 1;
if ((loc.x + dx) >= 0 && (loc.x + dx) < width && (loc.y + dy) >= 0 && (loc.y + dy) < height)
g.conc_density.at(loc.x + dx, loc.y + dy) += w * h;
}
}
}
}
void compute_overlap()
{
// populate for concrete cells only
compute_conc_density();
std::string overlap_str = "";
for (int idx = 0; idx < int(groups.size()); idx++) {
auto &g = groups.at(idx);
g.overlap = 0;
float total_area = 0;
for (auto tile : g.loc_area) {
// g.overlap += std::max<float>(0, g.conc_density.at(tile.x, tile.y) - tile.value);
g.overlap += std::max<float>(0, g.conc_density.at(tile.x, tile.y) - 1);
total_area += g.conc_density.at(tile.x, tile.y);
}
g.overlap /= std::max(1.0f, total_area);
if (!overlap_str.empty())
overlap_str += ", ";
overlap_str += stringf("%s=%.1f%%", cfg.cell_groups.at(idx).name.c_str(ctx), g.overlap * 100);
if (dump_density)
g.conc_density.write_csv(stringf("out_conc_density_%d_%d.csv", iter, idx));
}
log_info("overlap: %s\n", overlap_str.c_str());
}
void run_fft(int group)
{
// get data into form that fft wants
auto &g = groups.at(group);
for (auto entry : g.density)
g.density_fft.at(entry.x, entry.y) = entry.value;
if (fft_debug || dump_density)
g.density_fft.write_csv(stringf("out_bin_density_%d_%d.csv", iter, group));
// Based on
// https://github.com/ALIGN-analoglayout/ALIGN-public/blob/master/PlaceRouteHierFlow/EA_placer/FFT/fft.cpp
// initial DCT for coefficients
ddct2d(m, m, -1, g.density_fft.data(), nullptr, work_area_fft.data(), cs_table_fft.data());
// postprocess coefficients
for (int x = 0; x < m; x++)
g.density_fft.at(x, 0) *= 0.5f;
for (int y = 0; y < m; y++)
g.density_fft.at(0, y) *= 0.5f;
for (int x = 0; x < m; x++)
for (int y = 0; y < m; y++)
g.density_fft.at(x, y) *= (4.0f / (m * m));
// scale inputs to IDCT for potentials and field
for (int x = 0; x < m; x++) {
float wx = pi * (x / float(m));
float wx2 = wx * wx;
for (int y = 0; y < m; y++) {
float wy = pi * (y / float(m));
float wy2 = wy * wy;
float dens = g.density_fft.at(x, y);
float phi = 0, ex = 0, ey = 0;
if (x != 0 || y != 0) { // avoid divide by zero...
phi = dens / (wx2 + wy2);
ex = phi * wx;
ey = phi * wy;
}
g.electro_phi.at(x, y) = phi;
g.electro_fx.at(x, y) = ex;
g.electro_fy.at(x, y) = ey;
}
}
// IDCT for potential; 2D derivatives for field
ddct2d(m, m, 1, g.electro_phi.data(), nullptr, work_area_fft.data(), cs_table_fft.data());
ddsct2d(m, m, 1, g.electro_fx.data(), nullptr, work_area_fft.data(), cs_table_fft.data());
ddcst2d(m, m, 1, g.electro_fy.data(), nullptr, work_area_fft.data(), cs_table_fft.data());
if (fft_debug) {
g.electro_phi.write_csv(stringf("out_bin_phi_%d_%d.csv", iter, group));
g.electro_fx.write_csv(stringf("out_bin_ex_%d_%d.csv", iter, group));
g.electro_fy.write_csv(stringf("out_bin_ey_%d_%d.csv", iter, group));
}
}
void compute_bounds(PlacerNet &net, Axis axis, bool ref)
{
NetInfo *ni = net.ni;
auto drv_loc = cell_loc(ni->driver.cell, ref);
// seed with driver location
net.b1.at(axis) = net.b0.at(axis) = drv_loc.at(axis);
// iterate over users
for (auto &usr : ni->users) {
auto usr_loc = cell_loc(usr.cell, ref);
net.b1.at(axis) = std::max(net.b1.at(axis), usr_loc.at(axis));
net.b0.at(axis) = std::min(net.b0.at(axis), usr_loc.at(axis));
}
}
RealPair wl_coeff{0.5f, 0.5f};
void update_nets(bool ref)
{
static constexpr float min_wirelen_force = -3000.f;
pool.run(2 * nets.size(), [&](int i) {
auto &net = nets.at(i / 2);
auto axis = (i % 2) ? Axis::Y : Axis::X;
if (net.skip)
return;
net.min_exp.at(axis) = 0;
net.x_min_exp.at(axis) = 0;
net.max_exp.at(axis) = 0;
net.x_max_exp.at(axis) = 0;
// update bounding box
compute_bounds(net, axis, ref);
// compute rough center to subtract from exponents to avoid FP issues (from replace)
float c = (net.b1.at(axis) + net.b0.at(axis)) / 2.f;
for (auto &port : net.ports) {
if (!port.ref.cell)
continue;
RealPair loc = cell_loc(port.ref.cell, ref);
// update weighted-average model exponents
float emin = (c - loc.at(axis)) * wl_coeff.at(axis);
float emax = (loc.at(axis) - c) * wl_coeff.at(axis);
if (emin > min_wirelen_force) {
port.min_exp.at(axis) = std::exp(emin);
net.min_exp.at(axis) += port.min_exp.at(axis);
net.x_min_exp.at(axis) += loc.at(axis) * port.min_exp.at(axis);
} else {
port.min_exp.at(axis) = PlacerPort::invalid;
}
if (emax > min_wirelen_force) {
port.max_exp.at(axis) = std::exp(emax);
net.max_exp.at(axis) += port.max_exp.at(axis);
net.x_max_exp.at(axis) += loc.at(axis) * port.max_exp.at(axis);
} else {
port.max_exp.at(axis) = PlacerPort::invalid;
}
}
net.wa_wl.at(axis) =
(net.x_max_exp.at(axis) / net.max_exp.at(axis)) - (net.x_min_exp.at(axis) / net.min_exp.at(axis));
});
}
std::vector<std::pair<CellInfo *, RealPair>> gathered_wirelen_grad;
float wirelen_grad(CellInfo *cell, Axis axis, bool ref)
{
float gradient = 0;
if (cell->udata == -1)
return 0;
RealPair loc = cell_loc(cell, ref);
for (auto &port : cell->ports) {
NetInfo *ni = port.second.net;
if (!ni)
continue;
auto &nd = nets.at(ni->udata);
if (nd.skip)
continue;
auto &pd = nd.ports.at(port.second.type == PORT_OUT ? (nd.ports.size() - 1) : port.second.user_idx.idx());
// From Replace
// TODO: check these derivatives on paper
double d_min = 0, d_max = 0;
if (pd.has_min_exp(axis)) {
double min_sum = nd.min_exp.at(axis), x_min_sum = nd.x_min_exp.at(axis);
d_min = (min_sum * (pd.min_exp.at(axis) * (1.0f - wl_coeff.at(axis) * loc.at(axis))) +
wl_coeff.at(axis) * pd.min_exp.at(axis) * x_min_sum) /
(min_sum * min_sum);
}
if (pd.has_max_exp(axis)) {
double max_sum = nd.max_exp.at(axis), x_max_sum = nd.x_max_exp.at(axis);
d_max = (max_sum * (pd.max_exp.at(axis) * (1.0f + wl_coeff.at(axis) * loc.at(axis))) -
wl_coeff.at(axis) * pd.max_exp.at(axis) * x_max_sum) /
(max_sum * max_sum);
}
float crit = 0.0;
if (cfg.timing_driven) {
if (port.second.type == PORT_IN) {
crit = tmg.get_criticality(CellPortKey(cell->name, port.first));
} else if (port.second.type == PORT_OUT) {
if (ni && ni->users.entries() < 5) {
for (auto usr : ni->users)
crit = std::max(crit, tmg.get_criticality(CellPortKey(usr)));
}
}
}
float weight = 1.0 + 5 * std::pow(crit, 2);
gradient += weight * (d_min - d_max);
}
return gradient;
}
std::vector<float> dens_penalty;
float nesterov_a = 1.0f;
void update_gradients(bool ref = true, bool set_prev = true, bool init_penalty = false)
{
// TODO: skip non-group cells more efficiently?
pool.run(groups.size(), [&](int group) {
compute_density(group, ref);
run_fft(group);
});
update_nets(ref);
// First loop: back up gradients if required; set to zero; and compute density gradient
for (auto &cell : mcells) {
auto &g = groups.at(cell.group);
if (set_prev && ref) {
cell.last_wl_grad = cell.ref_wl_grad;
cell.last_dens_grad = cell.ref_dens_grad;
cell.last_total_grad = cell.ref_total_grad;
}
// wirelength gradient updated based on cell instances in next loop
(ref ? cell.ref_wl_grad : cell.wl_grad) = RealPair(0, 0);
// density grad based on bins - do we need to interpolate?
auto &grad = (ref ? cell.ref_dens_grad : cell.dens_grad);
grad = RealPair(0, 0);
iter_slithers((ref ? cell.ref_pos : cell.pos), cell.rect, [&](int x, int y, float area) {
grad += RealPair(g.electro_fx.at(x, y) * area, g.electro_fy.at(x, y) * area);
});
// total gradient computed at the end
(ref ? cell.ref_total_grad : cell.total_grad) = RealPair(0, 0);
}
if (gathered_wirelen_grad.empty()) {
for (auto &cell : ctx->cells) {
CellInfo *ci = cell.second.get();
if (ci->udata == -1)
continue;
gathered_wirelen_grad.emplace_back(ci, RealPair());
}
}
// Compute wirelength gradients for cells in parallel, this is a slow part
pool.run(gathered_wirelen_grad.size(), [&](int i) {
auto &entry = gathered_wirelen_grad.at(i);
CellInfo *ci = entry.first;
float wl_gx = wirelen_grad(ci, Axis::X, ref);
float wl_gy = wirelen_grad(ci, Axis::Y, ref);
entry.second = RealPair(wl_gx, wl_gy);
});
// Second loop: sum up wirelength gradients across concrete cell instances
for (auto entry : gathered_wirelen_grad) {
auto &mc = mcells.at(entry.first->udata);
(ref ? mc.ref_wl_grad : mc.wl_grad) += entry.second;
}
if (init_penalty) {
// set initial density penalty
double wirelen_sum = 0, force_sum = 0;
for (int i = 0; i < int(ccells.size()); i++) {
auto mc = mcells.at(i);
wirelen_sum += std::abs(mc.ref_wl_grad.x) + std::abs(mc.ref_wl_grad.y);
force_sum += std::abs(mc.ref_dens_grad.x) + std::abs(mc.ref_dens_grad.y);
}
const float eta = 1e-1;
float init_dens_penalty = eta * (wirelen_sum / force_sum);
log_info("initial density penalty: %f\n", init_dens_penalty);
dens_penalty.resize(groups.size(), init_dens_penalty);
update_potentials(true); // set initial potential
}
// Third loop: compute total gradient, and precondition
// TODO: ALM as well as simple penalty
for (auto &cell : mcells) {
#if 0
if (!cell.is_spacer) {
printf("%d (%f, %f) wirelen_grad: (%f,%f) density_grad: (%f,%f)\n", iter, cell.ref_pos.x,
cell.ref_pos.y, cell.ref_wl_grad.x, cell.ref_wl_grad.y, cell.ref_dens_grad.x,
cell.ref_dens_grad.y);
}
#endif
// Preconditioner from replace for now
float precond = std::max(1.0f, float(cell.pin_count) + dens_penalty[cell.group] * cell.rect.area());
if (ref) {
cell.ref_total_grad =
((cell.ref_wl_grad * -1) - cell.ref_dens_grad * dens_penalty[cell.group]) / precond;
} else {
cell.total_grad = ((cell.wl_grad * -1) - cell.dens_grad * dens_penalty[cell.group]) / precond;
}
}
}
float steplen = 0.01;
float get_steplen()
{
float coord_dist = 0;
float grad_dist = 0;
int n = 0;
for (auto &cell : mcells) {
if (cell.is_fixed || cell.is_dark)
continue;
coord_dist += (cell.ref_pos.x - cell.last_ref_pos.x) * (cell.ref_pos.x - cell.last_ref_pos.x);
coord_dist += (cell.ref_pos.y - cell.last_ref_pos.y) * (cell.ref_pos.y - cell.last_ref_pos.y);
grad_dist +=
(cell.ref_total_grad.x - cell.last_total_grad.x) * (cell.ref_total_grad.x - cell.last_total_grad.x);
grad_dist +=
(cell.ref_total_grad.y - cell.last_total_grad.y) * (cell.ref_total_grad.y - cell.last_total_grad.y);
n++;
}
coord_dist = std::sqrt(coord_dist / (2 * float(n)));
grad_dist = std::sqrt(grad_dist / (2 * float(n)));
log_info("coord_dist: %f grad_dist: %f\n", coord_dist, grad_dist);
return coord_dist / grad_dist;
// return 0.1;
}
float system_hpwl()
{
float hpwl = 0;
for (auto &net : nets) {
if (net.skip)
continue;
// update bounding box
compute_bounds(net, Axis::X, false);
compute_bounds(net, Axis::Y, false);
hpwl += net.b1.x - net.b0.x;
hpwl += net.b1.y - net.b0.y;
}
return hpwl;
}
void update_potentials(bool init = false)
{
for (auto &group : groups)
group.curr_potential = 0;
for (auto &cell : mcells) {
auto &g = groups.at(cell.group);
iter_slithers(cell.ref_pos, cell.rect,
[&](int x, int y, float area) { g.curr_potential += g.electro_phi.at(x, y) * area; });
}
if (init) {
for (auto &group : groups)
group.init_potential = group.curr_potential;
}
}
float system_potential()
{
float pot = 0;
for (auto &group : groups)
pot += group.curr_potential;
return pot;
}
float penalty_beta = 2.0e3f;
float alpha_l = 1.05f, alpha_h = 1.06f;
double penalty_incr = alpha_h - 1;
void update_penalties()
{
float pot_norm = 0;
// compute L2-norm of relative system potential
std::vector<float> rel_pot;
for (int g = 0; g < int(groups.size()); g++) {
auto &group = groups.at(g);
if (!group.enabled)
continue;
float phi_hat = group.curr_potential / group.init_potential;
rel_pot.push_back(phi_hat);
pot_norm += phi_hat * phi_hat;
}
pot_norm = sqrt(pot_norm);
log_info("pot_norm: %f\n", pot_norm);
// update penalty multiplier (ELFPlace equation 22)
double log_term = std::log(penalty_beta * pot_norm + 1);
penalty_incr = penalty_incr * ((log_term / (log_term + 1)) * (alpha_h - alpha_l) + alpha_l);
// update density penalties (ELFPlace equation 21)
for (int g = 0; g < int(groups.size()); g++) {
if (!groups.at(g).enabled)
continue;
float next_penalty = dens_penalty.at(g) + (penalty_incr * (rel_pot.at(g) / pot_norm));
dens_penalty.at(g) = next_penalty;
}
}
void initialise()
{
float initial_steplength = 0.01f;
// Update current and previous gradients with initial solution
for (auto &cell : mcells) {
cell.ref_pos = cell.pos;
}
while (true) {
update_gradients(true, true, /* init_penalty */ true);
// compute a "fake" previous position based on an arbitrary steplength and said gradients for nesterov
for (auto &cell : mcells) {
if (cell.is_fixed || cell.is_dark)
continue;
// save current position in last_pos
cell.last_pos = cell.pos;
cell.last_ref_pos = cell.ref_pos;
// compute previous position; but store it in current for gradient computation
cell.ref_pos = cell.pos - cell.ref_total_grad * initial_steplength;
}
// Compute the previous gradients (albeit into the current state fields)
update_gradients(true);
// Now we have the fake previous state in the current state
for (auto &cell : mcells) {
if (cell.is_fixed || cell.is_dark)
continue;
std::swap(cell.last_ref_pos, cell.ref_pos);
std::swap(cell.ref_total_grad, cell.last_total_grad);
std::swap(cell.ref_wl_grad, cell.last_wl_grad);
std::swap(cell.ref_dens_grad, cell.last_dens_grad);
}
float next_steplen = get_steplen();
log_info("initial steplen=%f next steplen = %f\n", initial_steplength, next_steplen);
if (next_steplen != 0 && std::isfinite(next_steplen) && std::abs(next_steplen) < 1e10) {
break;
} else {
initial_steplength *= 10;
}
}
update_timing();
}
RealPair clamp_loc(RealPair loc)
{
return RealPair(std::max<float>(0, std::min<float>(width - 1, loc.x)),
std::max<float>(0, std::min<float>(height - 1, loc.y)));
}
void update_chains()
{
// Move the post-solve position of a chain towards be the weighted average of its constituents
// The strength increases with iterations
float alpha = std::min<float>(std::pow(1.002f, iter) - 1, 1.0f);
float dist = 0;
for (int i = 0; i < int(macros.size()); i++) {
auto &macro = macros.at(i);
float total_area = 0;
const float area_epsilon = 0.05;
RealPair pos(0, 0), ref_pos(0, 0);
for (int c : macro.conc_cells) {
auto &mc = mcells.at(c);
float a = std::max(mc.rect.area(), area_epsilon);
pos += mc.pos * a;
ref_pos += mc.ref_pos * a;
total_area += a;
}
pos /= total_area;
ref_pos /= total_area;
for (int c : macro.conc_cells) {
auto &cc = ccells.at(c);
auto &mc = mcells.at(c);
auto last_pos = mc.pos;
mc.pos = mc.pos * (1 - alpha) + (pos + RealPair(cc.chunk_dx, cc.chunk_dy)) * alpha;
mc.ref_pos = mc.ref_pos * (1 - alpha) + (ref_pos + RealPair(cc.chunk_dx, cc.chunk_dy)) * alpha;
dist += std::sqrt(std::pow(last_pos.x - mc.pos.x, 2) + std::pow(last_pos.y - mc.pos.y, 2));
}
}
log_info(" update_chains distance %.2f\n", dist);
}
void step()
{
// TODO: update penalties; wirelength factor; etc
steplen = get_steplen();
std::string penalty_str = "";
for (auto p : dens_penalty) {
penalty_str += stringf("%s%.2f", penalty_str.empty() ? "" : ", ", p);
}
log_info("iter=%d steplen=%f a=%f penalty=[%s]\n", iter, steplen, nesterov_a, penalty_str.c_str());
float a_next = (1.0f + std::sqrt(4.0f * nesterov_a * nesterov_a + 1)) / 2.0f;
// Update positions using Nesterov's
for (auto &cell : mcells) {
if (cell.is_fixed || cell.is_dark)
continue;
// save current position in last_pos
cell.last_ref_pos = cell.ref_pos;
cell.last_pos = cell.pos;
// compute new position
cell.pos = clamp_loc(cell.ref_pos - cell.ref_total_grad * steplen);
// compute reference position
cell.ref_pos = clamp_loc(cell.pos + (cell.pos - cell.last_pos) * ((nesterov_a - 1) / a_next));
}
nesterov_a = a_next;
update_chains();
update_gradients(true);
update_potentials();
log_info(" system potential: %f hpwl: %f\n", system_potential(), system_hpwl());
compute_overlap();
if ((iter % 10) == 0)
update_timing();
}
void update_timing()
{
if (!cfg.timing_driven)
return;
for (auto &net : nets) {
NetInfo *ni = net.ni;
if (ni->driver.cell == nullptr)
continue;
RealPair drv_loc = cell_loc(ni->driver.cell, false);
for (auto usr : ni->users.enumerate()) {
RealPair usr_loc = cell_loc(usr.value.cell, false);
delay_t est_delay = cfg.timing_c + cfg.timing_mx * std::abs(drv_loc.x - usr_loc.x) +
cfg.timing_my * std::abs(drv_loc.y - usr_loc.y);
tmg.set_route_delay(CellPortKey(usr.value), DelayPair(est_delay));
}
}
tmg.run(false);
}
void legalise_step(bool dsp_bram)
{
// assume DSP and BRAM are all groups 2+ for now
for (int i = 0; i < int(ccells.size()); i++) {
auto &mc = mcells.at(i);
auto &cc = ccells.at(i);
if (dsp_bram && mc.group < 2)
continue;
if (!dsp_bram && mc.group >= 2)
continue;
if (cc.macro_idx != -1 && i != macros.at(cc.macro_idx).root->udata)
continue; // not macro root
if (mc.is_fixed) { // already placed
NPNR_ASSERT(cc.base_cell->bel != BelId());
continue;
}
enqueue_legalise(i);
}
// in the DSP/BRAM step, also merge any cells outside of a group (global buffers, other random crud/singletons)
for (auto &cell : ctx->cells) {
if (cell.second->udata == -1)
enqueue_legalise(cell.second.get());
}
log_info("Strict legalising %d cells...\n", int(to_legalise.size()));
float pre_hpwl = system_hpwl();
legalise_placement_strict(true);
update_nets(true);
float post_hpwl = system_hpwl();
log_info("HPWL after legalise: %f (delta: %f)\n", post_hpwl, post_hpwl - pre_hpwl);
}
void enqueue_legalise(int cell_idx)
{
NPNR_ASSERT(cell_idx < int(ccells.size())); // we should never be legalising spacers or dark nodes
auto &ccell = ccells.at(cell_idx);
if (ccell.macro_idx != -1) {
// is a macro
auto &macro = macros.at(ccell.macro_idx);
to_legalise.emplace(int(macro.cells.size()), macro.root->name);
} else {
to_legalise.emplace(1, ccell.base_cell->name);
}
}
void enqueue_legalise(CellInfo *ci)
{
if (ci->udata != -1) {
// managed by static
enqueue_legalise(ci->udata);
} else {
// special case
to_legalise.emplace(1, ci->name);
}
}
// Strict placement legalisation, performed after the initial HeAP spreading
void legalise_placement_strict(bool require_validity = true)
{
// At the moment we don't follow the full HeAP algorithm using cuts for legalisation, instead using
// the simple greedy largest-macro-first approach.
int ripup_radius = 2;
int total_iters = 0;
int total_iters_noreset = 0;
while (!to_legalise.empty()) {
auto top = to_legalise.top();
to_legalise.pop();
CellInfo *ci = ctx->cells.at(top.second).get();
// Was now placed, ignore
if (ci->bel != BelId())
continue;
// log_info(" Legalising %s (%s) %d\n", top.second.c_str(ctx), ci->type.c_str(ctx), top.first);
FastBels::FastBelsData *fb;
fast_bels.getBelsForCellType(ci->type, &fb);
int radius = 0;
int iter = 0;
int iter_at_radius = 0;
bool placed = false;
BelId bestBel;
int best_inp_len = std::numeric_limits<int>::max();
total_iters++;
total_iters_noreset++;
if (total_iters > int(ccells.size())) {
total_iters = 0;
ripup_radius = std::min(std::max(width + 1, height + 1), ripup_radius * 2);
}
if (total_iters_noreset > std::max(5000, 8 * int(ctx->cells.size()))) {
log_error("Unable to find legal placement for all cells, design is probably at utilisation limit.\n");
}
while (!placed) {
// Determine a search radius around the solver location (which increases over time) that is clamped to
// the region constraint for the cell (if applicable)
int rx = radius, ry = radius;
// Pick a random X and Y location within our search radius
int cx, cy;
if (ci->udata == -1) {
cx = width / 2;
cy = height / 2;
} else {
cx = int(mcells.at(ci->udata).pos.x);
cy = int(mcells.at(ci->udata).pos.y);
}
int nx = ctx->rng(2 * rx + 1) + std::max(cx - rx, 0);
int ny = ctx->rng(2 * ry + 1) + std::max(cy - ry, 0);
iter++;
iter_at_radius++;
if (iter >= (10 * (radius + 1))) {
// No luck yet, increase radius
radius = std::min(std::max(width + 1, height + 1), radius + 1);
while (radius < std::max(width + 1, height + 1)) {
// Keep increasing the radius until it will actually increase the number of cells we are
// checking (e.g. BRAM and DSP will not be in all cols/rows), so we don't waste effort
for (int x = std::max(0, cx - radius); x <= std::min(width + 1, cx + radius); x++) {
if (x >= int(fb->size()))
break;
for (int y = std::max(0, cy - radius); y <= std::min(height + 1, cy + radius); y++) {
if (y >= int(fb->at(x).size()))
break;
if (fb->at(x).at(y).size() > 0)
goto notempty;
}
}
radius = std::min(std::max(width + 1, height + 1), radius + 1);
}
notempty:
iter_at_radius = 0;
iter = 0;
}
// If our randomly chosen cooridnate is out of bounds; or points to a tile with no relevant bels; ignore
// it
if (nx < 0 || nx > width + 1)
continue;
if (ny < 0 || ny > height + 1)
continue;
if (nx >= int(fb->size()))
continue;
if (ny >= int(fb->at(nx).size()))
continue;
if (fb->at(nx).at(ny).empty())
continue;
// The number of attempts to find a location to try
int need_to_explore = 2 * radius;
// If we have found at least one legal location; and made enough attempts; assume it's good enough and
// finish
if (iter_at_radius >= need_to_explore && bestBel != BelId()) {
CellInfo *bound = ctx->getBoundBelCell(bestBel);
if (bound != nullptr) {
ctx->unbindBel(bound->bel);
enqueue_legalise(bound);
}
ctx->bindBel(bestBel, ci, STRENGTH_WEAK);
placed = true;
Loc loc = ctx->getBelLocation(bestBel);
if (ci->udata != -1) {
auto &mc = mcells.at(ci->udata);
mc.pos = mc.ref_pos = RealPair(loc, 0.5);
mc.is_fixed = true;
}
break;
}
if (ci->cluster == ClusterId()) {
// The case where we have no relative constraints
for (auto sz : fb->at(nx).at(ny)) {
// Look through all bels in this tile; checking region constraint if applicable
if (!ci->testRegion(sz))
continue;
// Prefer available bels; unless we are dealing with a wide radius (e.g. difficult control sets)
// or occasionally trigger a tiebreaker
if (ctx->checkBelAvail(sz) || (radius > ripup_radius || ctx->rng(20000) < 10)) {
CellInfo *bound = ctx->getBoundBelCell(sz);
if (bound != nullptr) {
// Only rip up cells without constraints
if (bound->cluster != ClusterId())
continue;
ctx->unbindBel(bound->bel);
}
// Provisionally bind the bel
ctx->bindBel(sz, ci, STRENGTH_WEAK);
if (require_validity && !ctx->isBelLocationValid(sz)) {
// New location is not legal; unbind the cell (and rebind the cell we ripped up if
// applicable)
ctx->unbindBel(sz);
if (bound != nullptr)
ctx->bindBel(sz, bound, STRENGTH_WEAK);
} else if (iter_at_radius < need_to_explore) {
// It's legal, but we haven't tried enough locations yet
ctx->unbindBel(sz);
if (bound != nullptr)
ctx->bindBel(sz, bound, STRENGTH_WEAK);
int input_len = 0;
// Compute a fast input wirelength metric at this bel; and save if better than our last
// try
for (auto &port : ci->ports) {
auto &p = port.second;
if (p.type != PORT_IN || p.net == nullptr || p.net->driver.cell == nullptr)
continue;
CellInfo *drv = p.net->driver.cell;
if (drv->udata == -1)
continue;
auto drv_loc = mcells.at(drv->udata);
input_len += std::abs(int(drv_loc.pos.x) - nx) + std::abs(int(drv_loc.pos.y) - ny);
}
if (input_len < best_inp_len) {
best_inp_len = input_len;
bestBel = sz;
}
break;
} else {
// It's legal, and we've tried enough. Finish.
if (bound != nullptr)
enqueue_legalise(bound);
Loc loc = ctx->getBelLocation(sz);
if (ci->udata != -1) {
auto &mc = mcells.at(ci->udata);
mc.pos = mc.ref_pos = RealPair(loc, 0.5);
mc.is_fixed = true;
}
placed = true;
break;
}
}
}
} else {
// We do have relative constraints
for (auto sz : fb->at(nx).at(ny)) {
// List of cells and their destination
std::vector<std::pair<CellInfo *, BelId>> targets;
// List of bels we placed things at; and the cell that was there before if applicable
std::vector<std::pair<BelId, CellInfo *>> swaps_made;
if (!ctx->getClusterPlacement(ci->cluster, sz, targets))
continue;
for (auto &target : targets) {
// Check it satisfies the region constraint if applicable
if (!target.first->testRegion(target.second))
goto fail;
CellInfo *bound = ctx->getBoundBelCell(target.second);
// Chains cannot overlap; so if we have to ripup a cell make sure it isn't part of a chain
if (bound != nullptr)
if (bound->cluster != ClusterId() || bound->belStrength > STRENGTH_WEAK)
goto fail;
}
// Actually perform the move; keeping track of the moves we make so we can revert them if needed
for (auto &target : targets) {
CellInfo *bound = ctx->getBoundBelCell(target.second);
if (bound != nullptr)
ctx->unbindBel(target.second);
ctx->bindBel(target.second, target.first, STRENGTH_STRONG);
swaps_made.emplace_back(target.second, bound);
}
// Check that the move we have made is legal
for (auto &sm : swaps_made) {
if (!ctx->isBelLocationValid(sm.first))
goto fail;
}
if (false) {
fail:
// If the move turned out to be illegal; revert all the moves we made
for (auto &swap : swaps_made) {
ctx->unbindBel(swap.first);
if (swap.second != nullptr)
ctx->bindBel(swap.first, swap.second, STRENGTH_WEAK);
}
continue;
}
for (auto &target : targets) {
Loc loc = ctx->getBelLocation(target.second);
if (ci->udata != -1) {
auto &mc = mcells.at(target.first->udata);
mc.pos = mc.ref_pos = RealPair(loc, 0.5);
mc.is_fixed = true;
}
// log_info("%s %d %d %d\n", target.first->name.c_str(ctx), loc.x, loc.y, loc.z);
}
for (auto &swap : swaps_made) {
// Where we have ripped up cells; add them to the queue
if (swap.second != nullptr)
enqueue_legalise(swap.second);
}
placed = true;
break;
}
}
}
}
}
public:
StaticPlacer(Context *ctx, PlacerStaticCfg cfg)
: ctx(ctx), cfg(cfg), fast_bels(ctx, true, 8), tmg(ctx), pool(ctx->setting<int>("threads", 8))
{
groups.resize(cfg.cell_groups.size());
tmg.setup_only = true;
tmg.setup();
dump_density = ctx->setting<bool>("static/dump_density", false);
};
void place()
{
log_info("Running Static placer...\n");
init_bels();
prepare_cells();
init_cells();
init_nets();
insert_dark();
insert_spacer();
prepare_density_bins();
initialise();
bool legalised_ip = false;
while (true) {
step();
for (auto &p : dens_penalty)
if (p < 50.0)
p *= 1.025;
else
p += 1.0;
// wl_coeff.at(Axis::X) = std::max(0.005, 0.995 * wl_coeff.at(Axis::X));
// wl_coeff.at(Axis::Y) = std::max(0.005, 0.995 * wl_coeff.at(Axis::Y));
// update_penalties();
if (!legalised_ip) {
float ip_overlap = 0;
for (int i = cfg.logic_groups; i < int(groups.size()); i++)
ip_overlap = std::max(ip_overlap, groups.at(i).overlap);
if (ip_overlap < 0.15) {
legalise_step(true);
legalised_ip = true;
for (int i = cfg.logic_groups; i < int(groups.size()); i++)
groups.at(i).enabled = false;
}
} else {
float logic_overlap = 0;
for (int i = 0; i < cfg.logic_groups; i++)
logic_overlap = std::max(logic_overlap, groups.at(i).overlap);
if (logic_overlap < 0.1) {
legalise_step(false);
break;
}
}
++iter;
}
{
auto placer1_cfg = Placer1Cfg(ctx);
placer1_cfg.hpwl_scale_x = cfg.hpwl_scale_x;
placer1_cfg.hpwl_scale_y = cfg.hpwl_scale_y;
placer1_refine(ctx, placer1_cfg);
}
}
};
}; // namespace
bool placer_static(Context *ctx, PlacerStaticCfg cfg)
{
StaticPlacer(ctx, cfg).place();
return true;
}
PlacerStaticCfg::PlacerStaticCfg(Context *ctx)
{
timing_driven = ctx->setting<bool>("timing_driven");
hpwl_scale_x = 1;
hpwl_scale_y = 1;
}
NEXTPNR_NAMESPACE_END