1541 lines
60 KiB
C++
1541 lines
60 KiB
C++
/*
|
|
* nextpnr -- Next Generation Place and Route
|
|
*
|
|
* Copyright (C) 2022 gatecat <gatecat@ds0.me>
|
|
*
|
|
* Permission to use, copy, modify, and/or distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
*
|
|
*/
|
|
|
|
#include "placer_static.h"
|
|
#include "static_util.h"
|
|
|
|
#include <boost/optional.hpp>
|
|
#include <chrono>
|
|
#include <deque>
|
|
#include <fstream>
|
|
#include <numeric>
|
|
#include <queue>
|
|
#include <tuple>
|
|
#include "array2d.h"
|
|
#include "fast_bels.h"
|
|
#include "log.h"
|
|
#include "nextpnr.h"
|
|
#include "parallel_refine.h"
|
|
#include "place_common.h"
|
|
#include "placer1.h"
|
|
#include "scope_lock.h"
|
|
#include "timing.h"
|
|
#include "util.h"
|
|
|
|
#include "fftsg.h"
|
|
|
|
#ifndef NPNR_DISABLE_THREADS
|
|
#include <atomic>
|
|
#include <condition_variable>
|
|
#include <mutex>
|
|
#include <thread>
|
|
|
|
#endif
|
|
|
|
NEXTPNR_NAMESPACE_BEGIN
|
|
|
|
using namespace StaticUtil;
|
|
|
|
namespace {
|
|
|
|
struct PlacerGroup
|
|
{
|
|
bool enabled = true;
|
|
int total_bels = 0;
|
|
double concrete_area = 0;
|
|
double dark_area = 0;
|
|
double total_area = 0;
|
|
array2d<float> loc_area;
|
|
|
|
float overlap = 0;
|
|
|
|
array2d<double> conc_density; // excludes fillers and dark nodes
|
|
array2d<double> density;
|
|
// FFT related data (TODO: should these be per group?)
|
|
FFTArray density_fft;
|
|
FFTArray electro_phi;
|
|
FFTArray electro_fx, electro_fy;
|
|
|
|
double init_potential = 0;
|
|
double curr_potential = 0;
|
|
};
|
|
|
|
// Could be an actual concrete netlist cell; or just a spacer
|
|
struct MoveCell
|
|
{
|
|
StaticRect rect;
|
|
// TODO: multiple contiguous vectors is probably faster than an array of structs, but also messier
|
|
RealPair pos, ref_pos, last_pos, last_ref_pos;
|
|
RealPair ref_wl_grad, wl_grad, last_wl_grad;
|
|
RealPair ref_dens_grad, dens_grad, last_dens_grad;
|
|
RealPair ref_total_grad, total_grad, last_total_grad;
|
|
int32_t pin_count;
|
|
int16_t group;
|
|
int16_t bx, by; // bins
|
|
bool is_fixed : 1;
|
|
bool is_spacer : 1;
|
|
bool is_dark : 1;
|
|
};
|
|
|
|
// Extra data for cells that aren't spacers
|
|
struct ConcreteCell
|
|
{
|
|
CellInfo *base_cell;
|
|
// When cells are macros; we split them up into chunks
|
|
// based on dx/dy location
|
|
int32_t macro_idx = -1;
|
|
int16_t chunk_dx = 0, chunk_dy = 0;
|
|
};
|
|
|
|
struct ClusterGroupKey
|
|
{
|
|
ClusterGroupKey(int dx = 0, int dy = 0, int group = -1) : dx(dx), dy(dy), group(group) {};
|
|
bool operator==(const ClusterGroupKey &other) const
|
|
{
|
|
return dx == other.dx && dy == other.dy && group == other.group;
|
|
}
|
|
unsigned hash() const { return mkhash(mkhash(dx, dy), group); }
|
|
int16_t dx, dy, group;
|
|
};
|
|
|
|
struct PlacerMacro
|
|
{
|
|
CellInfo *root;
|
|
std::vector<int32_t> conc_cells;
|
|
dict<ClusterGroupKey, std::vector<CellInfo *>> cells;
|
|
};
|
|
|
|
struct PlacerBin
|
|
{
|
|
float density;
|
|
// ...
|
|
};
|
|
|
|
struct PlacerPort
|
|
{
|
|
// for wirelength data
|
|
static constexpr float invalid = std::numeric_limits<float>::lowest();
|
|
|
|
PortRef ref;
|
|
RealPair max_exp{invalid, invalid};
|
|
RealPair min_exp{invalid, invalid};
|
|
bool has_max_exp(Axis axis) const { return max_exp.at(axis) != invalid; }
|
|
bool has_min_exp(Axis axis) const { return min_exp.at(axis) != invalid; }
|
|
};
|
|
|
|
struct PlacerNet
|
|
{
|
|
NetInfo *ni;
|
|
bool skip = false;
|
|
RealPair b1, b0; // real bounding box
|
|
RealPair min_exp, x_min_exp;
|
|
RealPair max_exp, x_max_exp;
|
|
RealPair wa_wl;
|
|
// lines up with user indexes; plus one for driver
|
|
std::vector<PlacerPort> ports;
|
|
int hpwl() { return (b1.x - b0.x) + (b1.y - b0.y); }
|
|
};
|
|
|
|
#ifdef NPNR_DISABLE_THREADS
|
|
struct ThreadPool
|
|
{
|
|
ThreadPool(int) {};
|
|
|
|
void run(int N, std::function<void(int)> func)
|
|
{
|
|
for (int i = 0; i < N; i++)
|
|
func(i);
|
|
};
|
|
};
|
|
#else
|
|
struct ThreadPool
|
|
{
|
|
ThreadPool(int thread_count)
|
|
{
|
|
done.resize(thread_count, false);
|
|
for (int i = 0; i < thread_count; i++) {
|
|
threads.emplace_back([this, i]() { this->worker(i); });
|
|
}
|
|
}
|
|
std::vector<std::thread> threads;
|
|
std::condition_variable cv_start, cv_done;
|
|
std::mutex mutex;
|
|
|
|
bool work_available = false;
|
|
bool shutdown = false;
|
|
std::vector<bool> done;
|
|
std::function<void(int)> work;
|
|
int work_count;
|
|
|
|
~ThreadPool()
|
|
{
|
|
{
|
|
std::lock_guard lk(mutex);
|
|
shutdown = true;
|
|
}
|
|
cv_start.notify_all();
|
|
for (auto &t : threads)
|
|
t.join();
|
|
}
|
|
|
|
void run(int N, std::function<void(int)> func)
|
|
{
|
|
{
|
|
std::lock_guard lk(mutex);
|
|
work = func;
|
|
work_count = N;
|
|
work_available = true;
|
|
std::fill(done.begin(), done.end(), false);
|
|
}
|
|
cv_start.notify_all();
|
|
{
|
|
std::unique_lock lk(mutex);
|
|
cv_done.wait(lk, [this] { return std::all_of(done.begin(), done.end(), [](bool x) { return x; }); });
|
|
work_available = false;
|
|
}
|
|
}
|
|
|
|
void worker(int idx)
|
|
{
|
|
while (true) {
|
|
std::unique_lock lk(mutex);
|
|
cv_start.wait(lk, [this, idx] { return (work_available && !done.at(idx)) || shutdown; });
|
|
if (shutdown) {
|
|
lk.unlock();
|
|
break;
|
|
} else if (work_available && !done.at(idx)) {
|
|
int work_per_thread = (work_count + int(threads.size()) - 1) / threads.size();
|
|
int begin = work_per_thread * idx;
|
|
int end = std::min(work_count, work_per_thread * (idx + 1));
|
|
lk.unlock();
|
|
|
|
for (int j = begin; j < end; j++) {
|
|
work(j);
|
|
}
|
|
|
|
lk.lock();
|
|
done.at(idx) = true;
|
|
lk.unlock();
|
|
cv_done.notify_one();
|
|
}
|
|
}
|
|
}
|
|
};
|
|
#endif
|
|
|
|
class StaticPlacer
|
|
{
|
|
Context *ctx;
|
|
PlacerStaticCfg cfg;
|
|
|
|
std::vector<MoveCell> mcells;
|
|
std::vector<ConcreteCell> ccells;
|
|
std::vector<PlacerMacro> macros;
|
|
std::vector<PlacerGroup> groups;
|
|
std::vector<PlacerNet> nets;
|
|
idict<ClusterId> cluster2idx;
|
|
|
|
FastBels fast_bels;
|
|
TimingAnalyser tmg;
|
|
ThreadPool pool;
|
|
|
|
int width, height;
|
|
int iter = 0;
|
|
bool fft_debug = false;
|
|
bool dump_density = false;
|
|
|
|
// legalisation queue
|
|
std::priority_queue<std::pair<int, IdString>> to_legalise;
|
|
|
|
void prepare_cells()
|
|
{
|
|
for (auto &cell : ctx->cells) {
|
|
CellInfo *ci = cell.second.get();
|
|
ci->udata = -1;
|
|
// process legacy-ish bel attributes
|
|
if (ci->attrs.count(ctx->id("BEL")) && ci->bel == BelId()) {
|
|
std::string loc_name = ci->attrs.at(ctx->id("BEL")).as_string();
|
|
BelId bel = ctx->getBelByNameStr(loc_name);
|
|
NPNR_ASSERT(ctx->isValidBelForCellType(ci->type, bel));
|
|
NPNR_ASSERT(ctx->checkBelAvail(bel));
|
|
ctx->bindBel(bel, ci, STRENGTH_USER);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool lookup_group(IdString type, int &group, StaticRect &rect)
|
|
{
|
|
for (size_t i = 0; i < cfg.cell_groups.size(); i++) {
|
|
const auto &g = cfg.cell_groups.at(i);
|
|
if (g.cell_area.count(type)) {
|
|
group = i;
|
|
rect = g.cell_area.at(type);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void init_bels()
|
|
{
|
|
log_info("⌁ initialising bels...\n");
|
|
width = 0;
|
|
height = 0;
|
|
for (auto bel : ctx->getBels()) {
|
|
Loc loc = ctx->getBelLocation(bel);
|
|
width = std::max(width, loc.x + 1);
|
|
height = std::max(height, loc.y + 1);
|
|
}
|
|
dict<IdString, int> beltype2group;
|
|
for (int i = 0; i < int(groups.size()); i++) {
|
|
groups.at(i).loc_area.reset(width, height);
|
|
for (const auto &bel_type : cfg.cell_groups.at(i).bel_area)
|
|
beltype2group[bel_type.first] = i;
|
|
}
|
|
for (auto bel : ctx->getBels()) {
|
|
Loc loc = ctx->getBelLocation(bel);
|
|
IdString type = ctx->getBelType(bel);
|
|
auto fnd = beltype2group.find(type);
|
|
if (fnd == beltype2group.end())
|
|
continue;
|
|
auto size = cfg.cell_groups.at(fnd->second).bel_area.at(type); // TODO: do we care about dimensions too
|
|
auto &group = groups.at(fnd->second);
|
|
for (int dy = 0; dy <= int(size.h); dy++) {
|
|
for (int dx = 0; dx <= int(size.w); dx++) {
|
|
float h = (dy == int(size.h)) ? (size.h - int(size.h)) : 1;
|
|
float w = (dx == int(size.w)) ? (size.w - int(size.w)) : 1;
|
|
group.loc_area.at(loc.x + dx, loc.y + dy) += w * h;
|
|
}
|
|
}
|
|
group.total_area += size.area();
|
|
group.total_bels += 1;
|
|
}
|
|
}
|
|
|
|
void init_nets()
|
|
{
|
|
nets.reserve(ctx->nets.size());
|
|
for (auto &net : ctx->nets) {
|
|
NetInfo *ni = net.second.get();
|
|
ni->udata = nets.size();
|
|
nets.emplace_back();
|
|
|
|
auto &nd = nets.back();
|
|
nd.ni = ni;
|
|
nd.skip = (ni->driver.cell == nullptr); // (or global buffer?)
|
|
nd.ports.resize(ni->users.capacity() + 1); // +1 for the driver
|
|
nd.ports.back().ref = ni->driver;
|
|
for (auto usr : ni->users.enumerate()) {
|
|
nd.ports.at(usr.index.idx()).ref = usr.value;
|
|
}
|
|
}
|
|
}
|
|
|
|
int add_cell(StaticRect rect, int group, RealPair pos, CellInfo *ci = nullptr)
|
|
{
|
|
int idx = mcells.size();
|
|
mcells.emplace_back();
|
|
auto &m = mcells.back();
|
|
m.rect = rect;
|
|
m.group = group;
|
|
m.pos = pos;
|
|
if (ci) {
|
|
// Is a concrete cell (might be a macro, in which case ci is just one of them...)
|
|
// Can't add concrete cells once we have spacers (we define it such that indices line up between mcells and
|
|
// ccells; spacer cells only exist in mcells)
|
|
NPNR_ASSERT(idx == int(ccells.size()));
|
|
ccells.emplace_back();
|
|
auto &c = ccells.back();
|
|
c.base_cell = ci;
|
|
groups.at(group).concrete_area += rect.area();
|
|
} else {
|
|
// Is a spacer cell
|
|
m.is_spacer = true;
|
|
}
|
|
return idx;
|
|
}
|
|
|
|
const float pi = 3.141592653589793f;
|
|
|
|
RealPair rand_loc()
|
|
{
|
|
// Box-muller
|
|
float u1 = ctx->rngf(1.0f);
|
|
while (u1 < 1e-5)
|
|
u1 = ctx->rngf(1.0f);
|
|
float u2 = ctx->rngf(1.0f);
|
|
float m = std::sqrt(-2.f * std::log(u1));
|
|
float z0 = m * std::cos(2.f * pi * u2);
|
|
float z1 = m * std::sin(2.f * pi * u2);
|
|
float x = (width / 2.f) + (width / 250.f) * z0;
|
|
float y = (height / 2.f) + (height / 250.f) * z1;
|
|
x = std::min<float>(width - 1.f, std::max<float>(x, 0));
|
|
y = std::min<float>(height - 1.f, std::max<float>(y, 0));
|
|
return RealPair(x, y);
|
|
}
|
|
|
|
RealPair cell_loc(CellInfo *ci, bool ref)
|
|
{
|
|
if (ci->udata == -1) {
|
|
// not handled?
|
|
NPNR_ASSERT_MSG(ci->bel != BelId(),
|
|
stringf("Cell %s of type %s has no bel", ci->name.c_str(ctx), ci->type.c_str(ctx))
|
|
.c_str()); // already fixed
|
|
return RealPair(ctx->getBelLocation(ci->bel), 0.5f);
|
|
} else {
|
|
return ref ? mcells.at(ci->udata).ref_pos : mcells.at(ci->udata).pos;
|
|
}
|
|
}
|
|
|
|
void init_cells()
|
|
{
|
|
log_info("⌁ initialising cells...\n");
|
|
// Process non-clustered cells and find clusters
|
|
for (auto &cell : ctx->cells) {
|
|
CellInfo *ci = cell.second.get();
|
|
int cell_group;
|
|
StaticRect rect;
|
|
// Mismatched group case
|
|
if (!lookup_group(ci->type, cell_group, rect)) {
|
|
if (ci->bel == BelId()) {
|
|
for (auto bel : ctx->getBels()) {
|
|
if (ctx->isValidBelForCellType(ci->type, bel) && ctx->checkBelAvail(bel)) {
|
|
ctx->bindBel(bel, ci, STRENGTH_STRONG);
|
|
if (!ctx->isBelLocationValid(bel)) {
|
|
ctx->unbindBel(bel);
|
|
} else {
|
|
log_info(" placed potpourri cell '%s' at bel '%s'\n", ctx->nameOf(ci),
|
|
ctx->nameOfBel(bel));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
if (ci->cluster != ClusterId()) {
|
|
// Defer processing of macro clusters
|
|
int c_idx = cluster2idx(ci->cluster);
|
|
if (c_idx >= int(macros.size())) {
|
|
macros.emplace_back();
|
|
macros.back().root = ctx->getClusterRootCell(ci->cluster);
|
|
}
|
|
auto &m = macros.at(c_idx);
|
|
Loc delta = ctx->getClusterOffset(ci);
|
|
m.cells[ClusterGroupKey(delta.x, delta.y, cell_group)].push_back(ci);
|
|
} else {
|
|
// Non-clustered cells can be processed already
|
|
int idx = add_cell(rect, cell_group, rand_loc(), ci);
|
|
ci->udata = idx;
|
|
auto &mc = mcells.at(idx);
|
|
mc.pin_count += int(ci->ports.size());
|
|
if (ci->bel != BelId()) {
|
|
// Currently; treat all ready-placed cells as fixed (eventually we might do incremental ripups
|
|
// here...)
|
|
Loc loc = ctx->getBelLocation(ci->bel);
|
|
mc.pos.x = loc.x + 0.5;
|
|
mc.pos.y = loc.y + 0.5;
|
|
mc.is_fixed = true;
|
|
}
|
|
}
|
|
}
|
|
// Process clustered cells
|
|
for (int i = 0; i < int(macros.size()); i++) {
|
|
auto &m = macros.at(i);
|
|
for (auto &kv : m.cells) {
|
|
const auto &g = cfg.cell_groups.at(kv.first.group);
|
|
// Only treat zero-area cells as zero-area; if this cluster also contains non-zero area cells
|
|
bool has_nonzero = std::any_of(kv.second.begin(), kv.second.end(),
|
|
[&](const CellInfo *ci) { return !g.zero_area_cells.count(ci->type); });
|
|
StaticRect cluster_size;
|
|
for (auto ci : kv.second) {
|
|
if (has_nonzero && g.zero_area_cells.count(ci->type))
|
|
continue;
|
|
// Compute an equivalent-area stacked rectange for cells in this cluster group.
|
|
// There are probably some ugly cases this handles badly.
|
|
StaticRect r = g.cell_area.at(ci->type);
|
|
if (r.w > r.h) {
|
|
// Long and thin, "stack" vertically
|
|
// Compute height we add to stack
|
|
if (cluster_size.w < r.w) {
|
|
cluster_size.h *= (cluster_size.w / r.w);
|
|
cluster_size.w = r.w;
|
|
}
|
|
cluster_size.h += ((r.w * r.h) / cluster_size.w);
|
|
} else {
|
|
// "stack" horizontally
|
|
if (cluster_size.h < r.h) {
|
|
cluster_size.w *= (cluster_size.h / r.h);
|
|
cluster_size.h = r.h;
|
|
}
|
|
cluster_size.w += ((r.w * r.h) / cluster_size.h);
|
|
}
|
|
}
|
|
// Now add the moveable cell
|
|
if (cluster_size.area() > 0) {
|
|
int idx = add_cell(cluster_size, kv.first.group, rand_loc(), kv.second.front());
|
|
auto &mc = mcells.at(idx);
|
|
if (kv.second.front()->bel != BelId()) {
|
|
// Currently; treat all ready-placed cells as fixed (eventually we might do incremental ripups
|
|
// here...)
|
|
Loc loc = ctx->getBelLocation(kv.second.front()->bel);
|
|
mc.pos.x = loc.x + 0.5;
|
|
mc.pos.y = loc.y + 0.5;
|
|
mc.is_fixed = true;
|
|
}
|
|
for (auto ci : kv.second) {
|
|
ci->udata = idx;
|
|
mc.pin_count += int(ci->ports.size());
|
|
}
|
|
auto &cc = ccells.at(idx);
|
|
cc.macro_idx = i;
|
|
cc.chunk_dx = kv.first.dx;
|
|
cc.chunk_dy = kv.first.dy;
|
|
m.conc_cells.push_back(idx);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const double target_util = 0.7;
|
|
|
|
void insert_dark()
|
|
{
|
|
log_info("⌁ inserting dark nodes...\n");
|
|
for (int group = 0; group < int(groups.size()); group++) {
|
|
const auto &cg = cfg.cell_groups.at(group);
|
|
auto &g = groups.at(group);
|
|
int dark_count = 0;
|
|
for (auto tile : g.loc_area) {
|
|
if (tile.value > 0.5f)
|
|
continue;
|
|
StaticRect dark_area(1.0f, 1.0f - tile.value);
|
|
int cell_idx = add_cell(dark_area, group, RealPair(tile.x, tile.y), nullptr /*spacer*/);
|
|
mcells.at(cell_idx).is_dark = true;
|
|
++dark_count;
|
|
}
|
|
log_info("⌁ group %s inserted a total of %d dark nodes\n", ctx->nameOf(cg.name), dark_count);
|
|
}
|
|
}
|
|
|
|
void insert_spacer()
|
|
{
|
|
log_info("⌁ inserting spacers...\n");
|
|
|
|
int inserted_spacers = 0;
|
|
for (int group = 0; group < int(groups.size()); group++) {
|
|
const auto &cg = cfg.cell_groups.at(group);
|
|
const auto &g = groups.at(group);
|
|
double util = g.concrete_area / g.total_area;
|
|
log_info("⌁ group %s pre-spacer utilisation %.02f%% (target %.02f%%)\n", ctx->nameOf(cg.name),
|
|
(util * 100.0), (target_util * 100.0));
|
|
// TODO: better computation of spacer size and placement?
|
|
int spacer_count = (g.total_area * target_util - g.concrete_area) / cg.spacer_rect.area();
|
|
if (spacer_count <= 0)
|
|
continue;
|
|
while (inserted_spacers < spacer_count) {
|
|
int x = ctx->rng(width);
|
|
int y = ctx->rng(height);
|
|
// avoid placing spacers at locations with dark nodes
|
|
if (ctx->rngf(1.0) > g.loc_area.at(x, y))
|
|
continue;
|
|
add_cell(cg.spacer_rect, group, RealPair(x + ctx->rngf(1.0), y + ctx->rngf(1.0)), nullptr /*spacer*/);
|
|
++inserted_spacers;
|
|
}
|
|
}
|
|
log_info("⌁ inserted a total of %d spacers\n", inserted_spacers);
|
|
}
|
|
|
|
// TODO: dark node insertion when we have obstructions or non-rectangular placement regions
|
|
int m;
|
|
double bin_w, bin_h;
|
|
|
|
std::vector<float> cs_table_fft;
|
|
std::vector<int> work_area_fft;
|
|
|
|
void prepare_density_bins()
|
|
{
|
|
// TODO: a m x m grid follows the paper and makes the DCTs easier, but is it actually ideal for non-square
|
|
// FPGAs?
|
|
m = 1 << int(std::ceil(std::log2(std::max(width, height))));
|
|
bin_w = double(width) / m;
|
|
bin_h = double(height) / m;
|
|
|
|
for (auto &g : groups) {
|
|
g.density.reset(m, m, 0);
|
|
g.density_fft.reset(m, m, 0);
|
|
g.electro_phi.reset(m, m, 0);
|
|
g.electro_fx.reset(m, m, 0);
|
|
g.electro_fy.reset(m, m, 0);
|
|
}
|
|
cs_table_fft.resize(m * 3 / 2, 0);
|
|
work_area_fft.resize(std::round(std::sqrt(m)) + 2, 0);
|
|
work_area_fft.at(0) = 0;
|
|
}
|
|
|
|
template <typename TFunc> void iter_slithers(RealPair pos, StaticRect rect, TFunc func)
|
|
{
|
|
// compute the stamp over bins (this could probably be more efficient?)
|
|
|
|
double width = rect.w, height = rect.h;
|
|
double scaled_density = 1.0;
|
|
if (width < bin_w) {
|
|
scaled_density *= (width / bin_w);
|
|
width = bin_w;
|
|
}
|
|
if (height < bin_h) {
|
|
scaled_density *= (height / bin_h);
|
|
height = bin_h;
|
|
}
|
|
|
|
double x0 = pos.x, x1 = pos.x + width;
|
|
double y0 = pos.y, y1 = pos.y + height;
|
|
for (int y = int(y0 / bin_h); y <= int(y1 / bin_h); y++) {
|
|
for (int x = int(x0 / bin_w); x <= int(x1 / bin_w); x++) {
|
|
if (x < 0 || x >= m || y < 0 || y >= m)
|
|
continue;
|
|
double slither_w = 1.0, slither_h = 1.0;
|
|
if (y == int(y0 / bin_h)) // y slithers
|
|
slither_h = ((y + 1) * bin_h) - y0;
|
|
else if (y == int(y1 / bin_h))
|
|
slither_h = (y1 - y * bin_h);
|
|
if (x == int(x0 / bin_w)) // x slithers
|
|
slither_w = ((x + 1) * bin_w) - x0;
|
|
else if (x == int(x1 / bin_w))
|
|
slither_w = (x1 - x * bin_w);
|
|
func(x, y, scaled_density * slither_w * slither_h);
|
|
}
|
|
}
|
|
};
|
|
|
|
void compute_density(int group, bool ref)
|
|
{
|
|
auto &g = groups.at(group);
|
|
// reset
|
|
for (auto entry : g.density)
|
|
entry.value = 0;
|
|
// populate
|
|
for (int idx = 0; idx < int(mcells.size()); idx++) {
|
|
auto &mc = mcells.at(idx);
|
|
if (mc.group != group)
|
|
continue;
|
|
// scale width and height to be at least one bin (local density smoothing from the eplace paper)
|
|
// TODO: should we really do this every iteration?
|
|
|
|
auto pos = ref ? mc.ref_pos : mc.pos;
|
|
iter_slithers(pos, mc.rect, [&](int x, int y, float area) { g.density.at(x, y) += area; });
|
|
}
|
|
}
|
|
|
|
void compute_conc_density()
|
|
{
|
|
for (auto &g : groups)
|
|
g.conc_density.reset(width, height, 0);
|
|
for (int idx = 0; idx < int(ccells.size()); idx++) {
|
|
auto &mc = mcells.at(idx);
|
|
auto &g = groups.at(mc.group);
|
|
auto loc = mc.pos;
|
|
auto size = mc.rect;
|
|
|
|
for (int dy = 0; dy <= int(size.h); dy++) {
|
|
for (int dx = 0; dx <= int(size.w); dx++) {
|
|
float h = (dy == int(size.h)) ? (size.h - int(size.h)) : 1;
|
|
float w = (dx == int(size.w)) ? (size.w - int(size.w)) : 1;
|
|
if ((loc.x + dx) >= 0 && (loc.x + dx) < width && (loc.y + dy) >= 0 && (loc.y + dy) < height)
|
|
g.conc_density.at(loc.x + dx, loc.y + dy) += w * h;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void compute_overlap()
|
|
{
|
|
// populate for concrete cells only
|
|
compute_conc_density();
|
|
|
|
std::string overlap_str = "";
|
|
for (int idx = 0; idx < int(groups.size()); idx++) {
|
|
auto &g = groups.at(idx);
|
|
g.overlap = 0;
|
|
float total_area = 0;
|
|
for (auto tile : g.loc_area) {
|
|
// g.overlap += std::max<float>(0, g.conc_density.at(tile.x, tile.y) - tile.value);
|
|
g.overlap += std::max<float>(0, g.conc_density.at(tile.x, tile.y) - 1);
|
|
total_area += g.conc_density.at(tile.x, tile.y);
|
|
}
|
|
g.overlap /= std::max(1.0f, total_area);
|
|
if (!overlap_str.empty())
|
|
overlap_str += ", ";
|
|
overlap_str += stringf("%s=%.1f%%", cfg.cell_groups.at(idx).name.c_str(ctx), g.overlap * 100);
|
|
if (dump_density)
|
|
g.conc_density.write_csv(stringf("out_conc_density_%d_%d.csv", iter, idx));
|
|
}
|
|
log_info("overlap: %s\n", overlap_str.c_str());
|
|
}
|
|
|
|
void run_fft(int group)
|
|
{
|
|
// get data into form that fft wants
|
|
auto &g = groups.at(group);
|
|
for (auto entry : g.density)
|
|
g.density_fft.at(entry.x, entry.y) = entry.value;
|
|
if (fft_debug || dump_density)
|
|
g.density_fft.write_csv(stringf("out_bin_density_%d_%d.csv", iter, group));
|
|
// Based on
|
|
// https://github.com/ALIGN-analoglayout/ALIGN-public/blob/master/PlaceRouteHierFlow/EA_placer/FFT/fft.cpp
|
|
// initial DCT for coefficients
|
|
ddct2d(m, m, -1, g.density_fft.data(), nullptr, work_area_fft.data(), cs_table_fft.data());
|
|
// postprocess coefficients
|
|
for (int x = 0; x < m; x++)
|
|
g.density_fft.at(x, 0) *= 0.5f;
|
|
for (int y = 0; y < m; y++)
|
|
g.density_fft.at(0, y) *= 0.5f;
|
|
for (int x = 0; x < m; x++)
|
|
for (int y = 0; y < m; y++)
|
|
g.density_fft.at(x, y) *= (4.0f / (m * m));
|
|
// scale inputs to IDCT for potentials and field
|
|
for (int x = 0; x < m; x++) {
|
|
float wx = pi * (x / float(m));
|
|
float wx2 = wx * wx;
|
|
for (int y = 0; y < m; y++) {
|
|
float wy = pi * (y / float(m));
|
|
float wy2 = wy * wy;
|
|
|
|
float dens = g.density_fft.at(x, y);
|
|
float phi = 0, ex = 0, ey = 0;
|
|
|
|
if (x != 0 || y != 0) { // avoid divide by zero...
|
|
phi = dens / (wx2 + wy2);
|
|
ex = phi * wx;
|
|
ey = phi * wy;
|
|
}
|
|
|
|
g.electro_phi.at(x, y) = phi;
|
|
g.electro_fx.at(x, y) = ex;
|
|
g.electro_fy.at(x, y) = ey;
|
|
}
|
|
}
|
|
// IDCT for potential; 2D derivatives for field
|
|
ddct2d(m, m, 1, g.electro_phi.data(), nullptr, work_area_fft.data(), cs_table_fft.data());
|
|
ddsct2d(m, m, 1, g.electro_fx.data(), nullptr, work_area_fft.data(), cs_table_fft.data());
|
|
ddcst2d(m, m, 1, g.electro_fy.data(), nullptr, work_area_fft.data(), cs_table_fft.data());
|
|
if (fft_debug) {
|
|
g.electro_phi.write_csv(stringf("out_bin_phi_%d_%d.csv", iter, group));
|
|
g.electro_fx.write_csv(stringf("out_bin_ex_%d_%d.csv", iter, group));
|
|
g.electro_fy.write_csv(stringf("out_bin_ey_%d_%d.csv", iter, group));
|
|
}
|
|
}
|
|
|
|
void compute_bounds(PlacerNet &net, Axis axis, bool ref)
|
|
{
|
|
NetInfo *ni = net.ni;
|
|
auto drv_loc = cell_loc(ni->driver.cell, ref);
|
|
// seed with driver location
|
|
net.b1.at(axis) = net.b0.at(axis) = drv_loc.at(axis);
|
|
// iterate over users
|
|
for (auto &usr : ni->users) {
|
|
auto usr_loc = cell_loc(usr.cell, ref);
|
|
net.b1.at(axis) = std::max(net.b1.at(axis), usr_loc.at(axis));
|
|
net.b0.at(axis) = std::min(net.b0.at(axis), usr_loc.at(axis));
|
|
}
|
|
}
|
|
|
|
RealPair wl_coeff{0.5f, 0.5f};
|
|
|
|
void update_nets(bool ref)
|
|
{
|
|
static constexpr float min_wirelen_force = -3000.f;
|
|
pool.run(2 * nets.size(), [&](int i) {
|
|
auto &net = nets.at(i / 2);
|
|
auto axis = (i % 2) ? Axis::Y : Axis::X;
|
|
if (net.skip)
|
|
return;
|
|
net.min_exp.at(axis) = 0;
|
|
net.x_min_exp.at(axis) = 0;
|
|
net.max_exp.at(axis) = 0;
|
|
net.x_max_exp.at(axis) = 0;
|
|
// update bounding box
|
|
compute_bounds(net, axis, ref);
|
|
// compute rough center to subtract from exponents to avoid FP issues (from replace)
|
|
float c = (net.b1.at(axis) + net.b0.at(axis)) / 2.f;
|
|
for (auto &port : net.ports) {
|
|
if (!port.ref.cell)
|
|
continue;
|
|
RealPair loc = cell_loc(port.ref.cell, ref);
|
|
// update weighted-average model exponents
|
|
float emin = (c - loc.at(axis)) * wl_coeff.at(axis);
|
|
float emax = (loc.at(axis) - c) * wl_coeff.at(axis);
|
|
|
|
if (emin > min_wirelen_force) {
|
|
port.min_exp.at(axis) = std::exp(emin);
|
|
net.min_exp.at(axis) += port.min_exp.at(axis);
|
|
net.x_min_exp.at(axis) += loc.at(axis) * port.min_exp.at(axis);
|
|
} else {
|
|
port.min_exp.at(axis) = PlacerPort::invalid;
|
|
}
|
|
if (emax > min_wirelen_force) {
|
|
port.max_exp.at(axis) = std::exp(emax);
|
|
net.max_exp.at(axis) += port.max_exp.at(axis);
|
|
net.x_max_exp.at(axis) += loc.at(axis) * port.max_exp.at(axis);
|
|
} else {
|
|
port.max_exp.at(axis) = PlacerPort::invalid;
|
|
}
|
|
}
|
|
net.wa_wl.at(axis) =
|
|
(net.x_max_exp.at(axis) / net.max_exp.at(axis)) - (net.x_min_exp.at(axis) / net.min_exp.at(axis));
|
|
});
|
|
}
|
|
|
|
std::vector<std::pair<CellInfo *, RealPair>> gathered_wirelen_grad;
|
|
|
|
float wirelen_grad(CellInfo *cell, Axis axis, bool ref)
|
|
{
|
|
float gradient = 0;
|
|
if (cell->udata == -1)
|
|
return 0;
|
|
RealPair loc = cell_loc(cell, ref);
|
|
for (auto &port : cell->ports) {
|
|
NetInfo *ni = port.second.net;
|
|
if (!ni)
|
|
continue;
|
|
auto &nd = nets.at(ni->udata);
|
|
if (nd.skip)
|
|
continue;
|
|
auto &pd = nd.ports.at(port.second.type == PORT_OUT ? (nd.ports.size() - 1) : port.second.user_idx.idx());
|
|
// From Replace
|
|
// TODO: check these derivatives on paper
|
|
double d_min = 0, d_max = 0;
|
|
if (pd.has_min_exp(axis)) {
|
|
double min_sum = nd.min_exp.at(axis), x_min_sum = nd.x_min_exp.at(axis);
|
|
d_min = (min_sum * (pd.min_exp.at(axis) * (1.0f - wl_coeff.at(axis) * loc.at(axis))) +
|
|
wl_coeff.at(axis) * pd.min_exp.at(axis) * x_min_sum) /
|
|
(min_sum * min_sum);
|
|
}
|
|
if (pd.has_max_exp(axis)) {
|
|
double max_sum = nd.max_exp.at(axis), x_max_sum = nd.x_max_exp.at(axis);
|
|
d_max = (max_sum * (pd.max_exp.at(axis) * (1.0f + wl_coeff.at(axis) * loc.at(axis))) -
|
|
wl_coeff.at(axis) * pd.max_exp.at(axis) * x_max_sum) /
|
|
(max_sum * max_sum);
|
|
}
|
|
float crit = 0.0;
|
|
if (cfg.timing_driven) {
|
|
if (port.second.type == PORT_IN) {
|
|
crit = tmg.get_criticality(CellPortKey(cell->name, port.first));
|
|
} else if (port.second.type == PORT_OUT) {
|
|
if (ni && ni->users.entries() < 5) {
|
|
for (auto usr : ni->users)
|
|
crit = std::max(crit, tmg.get_criticality(CellPortKey(usr)));
|
|
}
|
|
}
|
|
}
|
|
float weight = 1.0 + 5 * std::pow(crit, 2);
|
|
gradient += weight * (d_min - d_max);
|
|
}
|
|
|
|
return gradient;
|
|
}
|
|
|
|
std::vector<float> dens_penalty;
|
|
float nesterov_a = 1.0f;
|
|
|
|
void update_gradients(bool ref = true, bool set_prev = true, bool init_penalty = false)
|
|
{
|
|
// TODO: skip non-group cells more efficiently?
|
|
pool.run(groups.size(), [&](int group) {
|
|
compute_density(group, ref);
|
|
run_fft(group);
|
|
});
|
|
update_nets(ref);
|
|
// First loop: back up gradients if required; set to zero; and compute density gradient
|
|
for (auto &cell : mcells) {
|
|
auto &g = groups.at(cell.group);
|
|
if (set_prev && ref) {
|
|
cell.last_wl_grad = cell.ref_wl_grad;
|
|
cell.last_dens_grad = cell.ref_dens_grad;
|
|
cell.last_total_grad = cell.ref_total_grad;
|
|
}
|
|
// wirelength gradient updated based on cell instances in next loop
|
|
(ref ? cell.ref_wl_grad : cell.wl_grad) = RealPair(0, 0);
|
|
// density grad based on bins - do we need to interpolate?
|
|
auto &grad = (ref ? cell.ref_dens_grad : cell.dens_grad);
|
|
grad = RealPair(0, 0);
|
|
iter_slithers((ref ? cell.ref_pos : cell.pos), cell.rect, [&](int x, int y, float area) {
|
|
grad += RealPair(g.electro_fx.at(x, y) * area, g.electro_fy.at(x, y) * area);
|
|
});
|
|
// total gradient computed at the end
|
|
(ref ? cell.ref_total_grad : cell.total_grad) = RealPair(0, 0);
|
|
}
|
|
if (gathered_wirelen_grad.empty()) {
|
|
for (auto &cell : ctx->cells) {
|
|
CellInfo *ci = cell.second.get();
|
|
if (ci->udata == -1)
|
|
continue;
|
|
gathered_wirelen_grad.emplace_back(ci, RealPair());
|
|
}
|
|
}
|
|
// Compute wirelength gradients for cells in parallel, this is a slow part
|
|
pool.run(gathered_wirelen_grad.size(), [&](int i) {
|
|
auto &entry = gathered_wirelen_grad.at(i);
|
|
CellInfo *ci = entry.first;
|
|
float wl_gx = wirelen_grad(ci, Axis::X, ref);
|
|
float wl_gy = wirelen_grad(ci, Axis::Y, ref);
|
|
entry.second = RealPair(wl_gx, wl_gy);
|
|
});
|
|
// Second loop: sum up wirelength gradients across concrete cell instances
|
|
for (auto entry : gathered_wirelen_grad) {
|
|
auto &mc = mcells.at(entry.first->udata);
|
|
(ref ? mc.ref_wl_grad : mc.wl_grad) += entry.second;
|
|
}
|
|
if (init_penalty) {
|
|
// set initial density penalty
|
|
double wirelen_sum = 0, force_sum = 0;
|
|
for (int i = 0; i < int(ccells.size()); i++) {
|
|
auto mc = mcells.at(i);
|
|
wirelen_sum += std::abs(mc.ref_wl_grad.x) + std::abs(mc.ref_wl_grad.y);
|
|
force_sum += std::abs(mc.ref_dens_grad.x) + std::abs(mc.ref_dens_grad.y);
|
|
}
|
|
const float eta = 1e-1;
|
|
float init_dens_penalty = eta * (wirelen_sum / force_sum);
|
|
log_info("initial density penalty: %f\n", init_dens_penalty);
|
|
dens_penalty.resize(groups.size(), init_dens_penalty);
|
|
update_potentials(true); // set initial potential
|
|
}
|
|
// Third loop: compute total gradient, and precondition
|
|
// TODO: ALM as well as simple penalty
|
|
for (auto &cell : mcells) {
|
|
#if 0
|
|
if (!cell.is_spacer) {
|
|
printf("%d (%f, %f) wirelen_grad: (%f,%f) density_grad: (%f,%f)\n", iter, cell.ref_pos.x,
|
|
cell.ref_pos.y, cell.ref_wl_grad.x, cell.ref_wl_grad.y, cell.ref_dens_grad.x,
|
|
cell.ref_dens_grad.y);
|
|
}
|
|
#endif
|
|
// Preconditioner from replace for now
|
|
|
|
float precond = std::max(1.0f, float(cell.pin_count) + dens_penalty[cell.group] * cell.rect.area());
|
|
if (ref) {
|
|
cell.ref_total_grad =
|
|
((cell.ref_wl_grad * -1) - cell.ref_dens_grad * dens_penalty[cell.group]) / precond;
|
|
} else {
|
|
cell.total_grad = ((cell.wl_grad * -1) - cell.dens_grad * dens_penalty[cell.group]) / precond;
|
|
}
|
|
}
|
|
}
|
|
|
|
float steplen = 0.01;
|
|
|
|
float get_steplen()
|
|
{
|
|
float coord_dist = 0;
|
|
float grad_dist = 0;
|
|
int n = 0;
|
|
for (auto &cell : mcells) {
|
|
if (cell.is_fixed || cell.is_dark)
|
|
continue;
|
|
coord_dist += (cell.ref_pos.x - cell.last_ref_pos.x) * (cell.ref_pos.x - cell.last_ref_pos.x);
|
|
coord_dist += (cell.ref_pos.y - cell.last_ref_pos.y) * (cell.ref_pos.y - cell.last_ref_pos.y);
|
|
grad_dist +=
|
|
(cell.ref_total_grad.x - cell.last_total_grad.x) * (cell.ref_total_grad.x - cell.last_total_grad.x);
|
|
grad_dist +=
|
|
(cell.ref_total_grad.y - cell.last_total_grad.y) * (cell.ref_total_grad.y - cell.last_total_grad.y);
|
|
n++;
|
|
}
|
|
coord_dist = std::sqrt(coord_dist / (2 * float(n)));
|
|
grad_dist = std::sqrt(grad_dist / (2 * float(n)));
|
|
log_info("coord_dist: %f grad_dist: %f\n", coord_dist, grad_dist);
|
|
return coord_dist / grad_dist;
|
|
// return 0.1;
|
|
}
|
|
|
|
float system_hpwl()
|
|
{
|
|
float hpwl = 0;
|
|
for (auto &net : nets) {
|
|
if (net.skip)
|
|
continue;
|
|
// update bounding box
|
|
compute_bounds(net, Axis::X, false);
|
|
compute_bounds(net, Axis::Y, false);
|
|
hpwl += net.b1.x - net.b0.x;
|
|
hpwl += net.b1.y - net.b0.y;
|
|
}
|
|
return hpwl;
|
|
}
|
|
|
|
void update_potentials(bool init = false)
|
|
{
|
|
for (auto &group : groups)
|
|
group.curr_potential = 0;
|
|
for (auto &cell : mcells) {
|
|
auto &g = groups.at(cell.group);
|
|
iter_slithers(cell.ref_pos, cell.rect,
|
|
[&](int x, int y, float area) { g.curr_potential += g.electro_phi.at(x, y) * area; });
|
|
}
|
|
if (init) {
|
|
for (auto &group : groups)
|
|
group.init_potential = group.curr_potential;
|
|
}
|
|
}
|
|
|
|
float system_potential()
|
|
{
|
|
float pot = 0;
|
|
for (auto &group : groups)
|
|
pot += group.curr_potential;
|
|
return pot;
|
|
}
|
|
|
|
float penalty_beta = 2.0e3f;
|
|
float alpha_l = 1.05f, alpha_h = 1.06f;
|
|
double penalty_incr = alpha_h - 1;
|
|
void update_penalties()
|
|
{
|
|
float pot_norm = 0;
|
|
// compute L2-norm of relative system potential
|
|
std::vector<float> rel_pot;
|
|
for (int g = 0; g < int(groups.size()); g++) {
|
|
auto &group = groups.at(g);
|
|
if (!group.enabled)
|
|
continue;
|
|
float phi_hat = group.curr_potential / group.init_potential;
|
|
rel_pot.push_back(phi_hat);
|
|
pot_norm += phi_hat * phi_hat;
|
|
}
|
|
pot_norm = sqrt(pot_norm);
|
|
log_info("pot_norm: %f\n", pot_norm);
|
|
// update penalty multiplier (ELFPlace equation 22)
|
|
double log_term = std::log(penalty_beta * pot_norm + 1);
|
|
penalty_incr = penalty_incr * ((log_term / (log_term + 1)) * (alpha_h - alpha_l) + alpha_l);
|
|
// update density penalties (ELFPlace equation 21)
|
|
for (int g = 0; g < int(groups.size()); g++) {
|
|
if (!groups.at(g).enabled)
|
|
continue;
|
|
float next_penalty = dens_penalty.at(g) + (penalty_incr * (rel_pot.at(g) / pot_norm));
|
|
dens_penalty.at(g) = next_penalty;
|
|
}
|
|
}
|
|
|
|
void initialise()
|
|
{
|
|
float initial_steplength = 0.01f;
|
|
// Update current and previous gradients with initial solution
|
|
for (auto &cell : mcells) {
|
|
cell.ref_pos = cell.pos;
|
|
}
|
|
while (true) {
|
|
update_gradients(true, true, /* init_penalty */ true);
|
|
// compute a "fake" previous position based on an arbitrary steplength and said gradients for nesterov
|
|
for (auto &cell : mcells) {
|
|
if (cell.is_fixed || cell.is_dark)
|
|
continue;
|
|
// save current position in last_pos
|
|
cell.last_pos = cell.pos;
|
|
cell.last_ref_pos = cell.ref_pos;
|
|
// compute previous position; but store it in current for gradient computation
|
|
cell.ref_pos = cell.pos - cell.ref_total_grad * initial_steplength;
|
|
}
|
|
// Compute the previous gradients (albeit into the current state fields)
|
|
update_gradients(true);
|
|
// Now we have the fake previous state in the current state
|
|
for (auto &cell : mcells) {
|
|
if (cell.is_fixed || cell.is_dark)
|
|
continue;
|
|
std::swap(cell.last_ref_pos, cell.ref_pos);
|
|
std::swap(cell.ref_total_grad, cell.last_total_grad);
|
|
std::swap(cell.ref_wl_grad, cell.last_wl_grad);
|
|
std::swap(cell.ref_dens_grad, cell.last_dens_grad);
|
|
}
|
|
float next_steplen = get_steplen();
|
|
log_info("initial steplen=%f next steplen = %f\n", initial_steplength, next_steplen);
|
|
if (next_steplen != 0 && std::isfinite(next_steplen) && std::abs(next_steplen) < 1e10) {
|
|
break;
|
|
} else {
|
|
initial_steplength *= 10;
|
|
}
|
|
}
|
|
update_timing();
|
|
}
|
|
|
|
RealPair clamp_loc(RealPair loc)
|
|
{
|
|
return RealPair(std::max<float>(0, std::min<float>(width - 1, loc.x)),
|
|
std::max<float>(0, std::min<float>(height - 1, loc.y)));
|
|
}
|
|
|
|
void update_chains()
|
|
{
|
|
// Move the post-solve position of a chain towards be the weighted average of its constituents
|
|
// The strength increases with iterations
|
|
float alpha = std::min<float>(std::pow(1.002f, iter) - 1, 1.0f);
|
|
float dist = 0;
|
|
for (int i = 0; i < int(macros.size()); i++) {
|
|
auto ¯o = macros.at(i);
|
|
float total_area = 0;
|
|
const float area_epsilon = 0.05;
|
|
RealPair pos(0, 0), ref_pos(0, 0);
|
|
for (int c : macro.conc_cells) {
|
|
auto &mc = mcells.at(c);
|
|
float a = std::max(mc.rect.area(), area_epsilon);
|
|
pos += mc.pos * a;
|
|
ref_pos += mc.ref_pos * a;
|
|
total_area += a;
|
|
}
|
|
pos /= total_area;
|
|
ref_pos /= total_area;
|
|
for (int c : macro.conc_cells) {
|
|
auto &cc = ccells.at(c);
|
|
auto &mc = mcells.at(c);
|
|
auto last_pos = mc.pos;
|
|
mc.pos = mc.pos * (1 - alpha) + (pos + RealPair(cc.chunk_dx, cc.chunk_dy)) * alpha;
|
|
mc.ref_pos = mc.ref_pos * (1 - alpha) + (ref_pos + RealPair(cc.chunk_dx, cc.chunk_dy)) * alpha;
|
|
dist += std::sqrt(std::pow(last_pos.x - mc.pos.x, 2) + std::pow(last_pos.y - mc.pos.y, 2));
|
|
}
|
|
}
|
|
log_info(" update_chains distance %.2f\n", dist);
|
|
}
|
|
|
|
void step()
|
|
{
|
|
// TODO: update penalties; wirelength factor; etc
|
|
steplen = get_steplen();
|
|
std::string penalty_str = "";
|
|
for (auto p : dens_penalty) {
|
|
penalty_str += stringf("%s%.2f", penalty_str.empty() ? "" : ", ", p);
|
|
}
|
|
log_info("iter=%d steplen=%f a=%f penalty=[%s]\n", iter, steplen, nesterov_a, penalty_str.c_str());
|
|
float a_next = (1.0f + std::sqrt(4.0f * nesterov_a * nesterov_a + 1)) / 2.0f;
|
|
// Update positions using Nesterov's
|
|
for (auto &cell : mcells) {
|
|
if (cell.is_fixed || cell.is_dark)
|
|
continue;
|
|
// save current position in last_pos
|
|
cell.last_ref_pos = cell.ref_pos;
|
|
cell.last_pos = cell.pos;
|
|
// compute new position
|
|
cell.pos = clamp_loc(cell.ref_pos - cell.ref_total_grad * steplen);
|
|
// compute reference position
|
|
cell.ref_pos = clamp_loc(cell.pos + (cell.pos - cell.last_pos) * ((nesterov_a - 1) / a_next));
|
|
}
|
|
nesterov_a = a_next;
|
|
update_chains();
|
|
update_gradients(true);
|
|
update_potentials();
|
|
log_info(" system potential: %f hpwl: %f\n", system_potential(), system_hpwl());
|
|
compute_overlap();
|
|
if ((iter % 10) == 0)
|
|
update_timing();
|
|
}
|
|
|
|
void update_timing()
|
|
{
|
|
if (!cfg.timing_driven)
|
|
return;
|
|
for (auto &net : nets) {
|
|
NetInfo *ni = net.ni;
|
|
if (ni->driver.cell == nullptr)
|
|
continue;
|
|
RealPair drv_loc = cell_loc(ni->driver.cell, false);
|
|
for (auto usr : ni->users.enumerate()) {
|
|
RealPair usr_loc = cell_loc(usr.value.cell, false);
|
|
delay_t est_delay = cfg.timing_c + cfg.timing_mx * std::abs(drv_loc.x - usr_loc.x) +
|
|
cfg.timing_my * std::abs(drv_loc.y - usr_loc.y);
|
|
tmg.set_route_delay(CellPortKey(usr.value), DelayPair(est_delay));
|
|
}
|
|
}
|
|
tmg.run(false);
|
|
}
|
|
|
|
void legalise_step(bool dsp_bram)
|
|
{
|
|
// assume DSP and BRAM are all groups 2+ for now
|
|
for (int i = 0; i < int(ccells.size()); i++) {
|
|
auto &mc = mcells.at(i);
|
|
auto &cc = ccells.at(i);
|
|
if (dsp_bram && mc.group < 2)
|
|
continue;
|
|
if (!dsp_bram && mc.group >= 2)
|
|
continue;
|
|
if (cc.macro_idx != -1 && i != macros.at(cc.macro_idx).root->udata)
|
|
continue; // not macro root
|
|
if (mc.is_fixed) { // already placed
|
|
NPNR_ASSERT(cc.base_cell->bel != BelId());
|
|
continue;
|
|
}
|
|
enqueue_legalise(i);
|
|
}
|
|
// in the DSP/BRAM step, also merge any cells outside of a group (global buffers, other random crud/singletons)
|
|
for (auto &cell : ctx->cells) {
|
|
if (cell.second->udata == -1)
|
|
enqueue_legalise(cell.second.get());
|
|
}
|
|
log_info("Strict legalising %d cells...\n", int(to_legalise.size()));
|
|
float pre_hpwl = system_hpwl();
|
|
legalise_placement_strict(true);
|
|
update_nets(true);
|
|
float post_hpwl = system_hpwl();
|
|
log_info("HPWL after legalise: %f (delta: %f)\n", post_hpwl, post_hpwl - pre_hpwl);
|
|
}
|
|
|
|
void enqueue_legalise(int cell_idx)
|
|
{
|
|
NPNR_ASSERT(cell_idx < int(ccells.size())); // we should never be legalising spacers or dark nodes
|
|
auto &ccell = ccells.at(cell_idx);
|
|
if (ccell.macro_idx != -1) {
|
|
// is a macro
|
|
auto ¯o = macros.at(ccell.macro_idx);
|
|
to_legalise.emplace(int(macro.cells.size()), macro.root->name);
|
|
} else {
|
|
to_legalise.emplace(1, ccell.base_cell->name);
|
|
}
|
|
}
|
|
|
|
void enqueue_legalise(CellInfo *ci)
|
|
{
|
|
if (ci->udata != -1) {
|
|
// managed by static
|
|
enqueue_legalise(ci->udata);
|
|
} else {
|
|
// special case
|
|
to_legalise.emplace(1, ci->name);
|
|
}
|
|
}
|
|
// Strict placement legalisation, performed after the initial HeAP spreading
|
|
void legalise_placement_strict(bool require_validity = true)
|
|
{
|
|
// At the moment we don't follow the full HeAP algorithm using cuts for legalisation, instead using
|
|
// the simple greedy largest-macro-first approach.
|
|
int ripup_radius = 2;
|
|
int total_iters = 0;
|
|
int total_iters_noreset = 0;
|
|
while (!to_legalise.empty()) {
|
|
auto top = to_legalise.top();
|
|
to_legalise.pop();
|
|
|
|
CellInfo *ci = ctx->cells.at(top.second).get();
|
|
// Was now placed, ignore
|
|
if (ci->bel != BelId())
|
|
continue;
|
|
// log_info(" Legalising %s (%s) %d\n", top.second.c_str(ctx), ci->type.c_str(ctx), top.first);
|
|
FastBels::FastBelsData *fb;
|
|
fast_bels.getBelsForCellType(ci->type, &fb);
|
|
int radius = 0;
|
|
int iter = 0;
|
|
int iter_at_radius = 0;
|
|
bool placed = false;
|
|
BelId bestBel;
|
|
int best_inp_len = std::numeric_limits<int>::max();
|
|
|
|
total_iters++;
|
|
total_iters_noreset++;
|
|
if (total_iters > int(ccells.size())) {
|
|
total_iters = 0;
|
|
ripup_radius = std::min(std::max(width + 1, height + 1), ripup_radius * 2);
|
|
}
|
|
|
|
if (total_iters_noreset > std::max(5000, 8 * int(ctx->cells.size()))) {
|
|
log_error("Unable to find legal placement for all cells, design is probably at utilisation limit.\n");
|
|
}
|
|
|
|
while (!placed) {
|
|
// Determine a search radius around the solver location (which increases over time) that is clamped to
|
|
// the region constraint for the cell (if applicable)
|
|
int rx = radius, ry = radius;
|
|
|
|
// Pick a random X and Y location within our search radius
|
|
int cx, cy;
|
|
if (ci->udata == -1) {
|
|
cx = width / 2;
|
|
cy = height / 2;
|
|
} else {
|
|
cx = int(mcells.at(ci->udata).pos.x);
|
|
cy = int(mcells.at(ci->udata).pos.y);
|
|
}
|
|
int nx = ctx->rng(2 * rx + 1) + std::max(cx - rx, 0);
|
|
int ny = ctx->rng(2 * ry + 1) + std::max(cy - ry, 0);
|
|
|
|
iter++;
|
|
iter_at_radius++;
|
|
if (iter >= (10 * (radius + 1))) {
|
|
// No luck yet, increase radius
|
|
radius = std::min(std::max(width + 1, height + 1), radius + 1);
|
|
while (radius < std::max(width + 1, height + 1)) {
|
|
// Keep increasing the radius until it will actually increase the number of cells we are
|
|
// checking (e.g. BRAM and DSP will not be in all cols/rows), so we don't waste effort
|
|
for (int x = std::max(0, cx - radius); x <= std::min(width + 1, cx + radius); x++) {
|
|
if (x >= int(fb->size()))
|
|
break;
|
|
for (int y = std::max(0, cy - radius); y <= std::min(height + 1, cy + radius); y++) {
|
|
if (y >= int(fb->at(x).size()))
|
|
break;
|
|
if (fb->at(x).at(y).size() > 0)
|
|
goto notempty;
|
|
}
|
|
}
|
|
radius = std::min(std::max(width + 1, height + 1), radius + 1);
|
|
}
|
|
notempty:
|
|
iter_at_radius = 0;
|
|
iter = 0;
|
|
}
|
|
// If our randomly chosen cooridnate is out of bounds; or points to a tile with no relevant bels; ignore
|
|
// it
|
|
if (nx < 0 || nx > width + 1)
|
|
continue;
|
|
if (ny < 0 || ny > height + 1)
|
|
continue;
|
|
|
|
if (nx >= int(fb->size()))
|
|
continue;
|
|
if (ny >= int(fb->at(nx).size()))
|
|
continue;
|
|
if (fb->at(nx).at(ny).empty())
|
|
continue;
|
|
|
|
// The number of attempts to find a location to try
|
|
int need_to_explore = 2 * radius;
|
|
|
|
// If we have found at least one legal location; and made enough attempts; assume it's good enough and
|
|
// finish
|
|
if (iter_at_radius >= need_to_explore && bestBel != BelId()) {
|
|
CellInfo *bound = ctx->getBoundBelCell(bestBel);
|
|
if (bound != nullptr) {
|
|
ctx->unbindBel(bound->bel);
|
|
enqueue_legalise(bound);
|
|
}
|
|
ctx->bindBel(bestBel, ci, STRENGTH_WEAK);
|
|
placed = true;
|
|
Loc loc = ctx->getBelLocation(bestBel);
|
|
if (ci->udata != -1) {
|
|
auto &mc = mcells.at(ci->udata);
|
|
mc.pos = mc.ref_pos = RealPair(loc, 0.5);
|
|
mc.is_fixed = true;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (ci->cluster == ClusterId()) {
|
|
// The case where we have no relative constraints
|
|
for (auto sz : fb->at(nx).at(ny)) {
|
|
// Look through all bels in this tile; checking region constraint if applicable
|
|
if (!ci->testRegion(sz))
|
|
continue;
|
|
// Prefer available bels; unless we are dealing with a wide radius (e.g. difficult control sets)
|
|
// or occasionally trigger a tiebreaker
|
|
if (ctx->checkBelAvail(sz) || (radius > ripup_radius || ctx->rng(20000) < 10)) {
|
|
CellInfo *bound = ctx->getBoundBelCell(sz);
|
|
if (bound != nullptr) {
|
|
// Only rip up cells without constraints
|
|
if (bound->cluster != ClusterId())
|
|
continue;
|
|
ctx->unbindBel(bound->bel);
|
|
}
|
|
// Provisionally bind the bel
|
|
ctx->bindBel(sz, ci, STRENGTH_WEAK);
|
|
if (require_validity && !ctx->isBelLocationValid(sz)) {
|
|
// New location is not legal; unbind the cell (and rebind the cell we ripped up if
|
|
// applicable)
|
|
ctx->unbindBel(sz);
|
|
if (bound != nullptr)
|
|
ctx->bindBel(sz, bound, STRENGTH_WEAK);
|
|
} else if (iter_at_radius < need_to_explore) {
|
|
// It's legal, but we haven't tried enough locations yet
|
|
ctx->unbindBel(sz);
|
|
if (bound != nullptr)
|
|
ctx->bindBel(sz, bound, STRENGTH_WEAK);
|
|
int input_len = 0;
|
|
// Compute a fast input wirelength metric at this bel; and save if better than our last
|
|
// try
|
|
for (auto &port : ci->ports) {
|
|
auto &p = port.second;
|
|
if (p.type != PORT_IN || p.net == nullptr || p.net->driver.cell == nullptr)
|
|
continue;
|
|
CellInfo *drv = p.net->driver.cell;
|
|
if (drv->udata == -1)
|
|
continue;
|
|
auto drv_loc = mcells.at(drv->udata);
|
|
input_len += std::abs(int(drv_loc.pos.x) - nx) + std::abs(int(drv_loc.pos.y) - ny);
|
|
}
|
|
if (input_len < best_inp_len) {
|
|
best_inp_len = input_len;
|
|
bestBel = sz;
|
|
}
|
|
break;
|
|
} else {
|
|
// It's legal, and we've tried enough. Finish.
|
|
if (bound != nullptr)
|
|
enqueue_legalise(bound);
|
|
Loc loc = ctx->getBelLocation(sz);
|
|
if (ci->udata != -1) {
|
|
auto &mc = mcells.at(ci->udata);
|
|
mc.pos = mc.ref_pos = RealPair(loc, 0.5);
|
|
mc.is_fixed = true;
|
|
}
|
|
placed = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// We do have relative constraints
|
|
for (auto sz : fb->at(nx).at(ny)) {
|
|
// List of cells and their destination
|
|
std::vector<std::pair<CellInfo *, BelId>> targets;
|
|
// List of bels we placed things at; and the cell that was there before if applicable
|
|
std::vector<std::pair<BelId, CellInfo *>> swaps_made;
|
|
|
|
if (!ctx->getClusterPlacement(ci->cluster, sz, targets))
|
|
continue;
|
|
|
|
for (auto &target : targets) {
|
|
// Check it satisfies the region constraint if applicable
|
|
if (!target.first->testRegion(target.second))
|
|
goto fail;
|
|
CellInfo *bound = ctx->getBoundBelCell(target.second);
|
|
// Chains cannot overlap; so if we have to ripup a cell make sure it isn't part of a chain
|
|
if (bound != nullptr)
|
|
if (bound->cluster != ClusterId() || bound->belStrength > STRENGTH_WEAK)
|
|
goto fail;
|
|
}
|
|
// Actually perform the move; keeping track of the moves we make so we can revert them if needed
|
|
for (auto &target : targets) {
|
|
CellInfo *bound = ctx->getBoundBelCell(target.second);
|
|
if (bound != nullptr)
|
|
ctx->unbindBel(target.second);
|
|
ctx->bindBel(target.second, target.first, STRENGTH_STRONG);
|
|
swaps_made.emplace_back(target.second, bound);
|
|
}
|
|
// Check that the move we have made is legal
|
|
for (auto &sm : swaps_made) {
|
|
if (!ctx->isBelLocationValid(sm.first))
|
|
goto fail;
|
|
}
|
|
|
|
if (false) {
|
|
fail:
|
|
// If the move turned out to be illegal; revert all the moves we made
|
|
for (auto &swap : swaps_made) {
|
|
ctx->unbindBel(swap.first);
|
|
if (swap.second != nullptr)
|
|
ctx->bindBel(swap.first, swap.second, STRENGTH_WEAK);
|
|
}
|
|
continue;
|
|
}
|
|
for (auto &target : targets) {
|
|
Loc loc = ctx->getBelLocation(target.second);
|
|
if (ci->udata != -1) {
|
|
auto &mc = mcells.at(target.first->udata);
|
|
mc.pos = mc.ref_pos = RealPair(loc, 0.5);
|
|
mc.is_fixed = true;
|
|
}
|
|
// log_info("%s %d %d %d\n", target.first->name.c_str(ctx), loc.x, loc.y, loc.z);
|
|
}
|
|
for (auto &swap : swaps_made) {
|
|
// Where we have ripped up cells; add them to the queue
|
|
if (swap.second != nullptr)
|
|
enqueue_legalise(swap.second);
|
|
}
|
|
|
|
placed = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public:
|
|
StaticPlacer(Context *ctx, PlacerStaticCfg cfg)
|
|
: ctx(ctx), cfg(cfg), fast_bels(ctx, true, 8), tmg(ctx), pool(ctx->setting<int>("threads", 8))
|
|
{
|
|
groups.resize(cfg.cell_groups.size());
|
|
tmg.setup_only = true;
|
|
tmg.setup();
|
|
dump_density = ctx->setting<bool>("static/dump_density", false);
|
|
};
|
|
void place()
|
|
{
|
|
log_info("Running Static placer...\n");
|
|
init_bels();
|
|
prepare_cells();
|
|
init_cells();
|
|
init_nets();
|
|
insert_dark();
|
|
insert_spacer();
|
|
|
|
prepare_density_bins();
|
|
initialise();
|
|
bool legalised_ip = false;
|
|
while (true) {
|
|
step();
|
|
for (auto &p : dens_penalty)
|
|
if (p < 50.0)
|
|
p *= 1.025;
|
|
else
|
|
p += 1.0;
|
|
// wl_coeff.at(Axis::X) = std::max(0.005, 0.995 * wl_coeff.at(Axis::X));
|
|
// wl_coeff.at(Axis::Y) = std::max(0.005, 0.995 * wl_coeff.at(Axis::Y));
|
|
|
|
// update_penalties();
|
|
if (!legalised_ip) {
|
|
float ip_overlap = 0;
|
|
for (int i = cfg.logic_groups; i < int(groups.size()); i++)
|
|
ip_overlap = std::max(ip_overlap, groups.at(i).overlap);
|
|
if (ip_overlap < 0.15) {
|
|
legalise_step(true);
|
|
legalised_ip = true;
|
|
for (int i = cfg.logic_groups; i < int(groups.size()); i++)
|
|
groups.at(i).enabled = false;
|
|
}
|
|
} else {
|
|
float logic_overlap = 0;
|
|
for (int i = 0; i < cfg.logic_groups; i++)
|
|
logic_overlap = std::max(logic_overlap, groups.at(i).overlap);
|
|
if (logic_overlap < 0.1) {
|
|
legalise_step(false);
|
|
break;
|
|
}
|
|
}
|
|
++iter;
|
|
}
|
|
{
|
|
auto placer1_cfg = Placer1Cfg(ctx);
|
|
placer1_cfg.hpwl_scale_x = cfg.hpwl_scale_x;
|
|
placer1_cfg.hpwl_scale_y = cfg.hpwl_scale_y;
|
|
placer1_refine(ctx, placer1_cfg);
|
|
}
|
|
}
|
|
};
|
|
}; // namespace
|
|
|
|
bool placer_static(Context *ctx, PlacerStaticCfg cfg)
|
|
{
|
|
StaticPlacer(ctx, cfg).place();
|
|
return true;
|
|
}
|
|
|
|
PlacerStaticCfg::PlacerStaticCfg(Context *ctx)
|
|
{
|
|
timing_driven = ctx->setting<bool>("timing_driven");
|
|
|
|
hpwl_scale_x = 1;
|
|
hpwl_scale_y = 1;
|
|
}
|
|
|
|
NEXTPNR_NAMESPACE_END
|