HeAP: Initial infrastructure
Signed-off-by: David Shah <dave@ds0.me>
This commit is contained in:
parent
4cf8549f5c
commit
e36460b83e
366
common/placer_heap.cc
Normal file
366
common/placer_heap.cc
Normal file
@ -0,0 +1,366 @@
|
||||
/*
|
||||
* nextpnr -- Next Generation Place and Route
|
||||
*
|
||||
* Copyright (C) 2019 David Shah <david@symbioticeda.com>
|
||||
*
|
||||
* Permission to use, copy, modify, and/or distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* [[cite]] HeAP
|
||||
* Analytical Placement for Heterogeneous FPGAs, Marcel Gort and Jason H. Anderson
|
||||
* https://janders.eecg.utoronto.ca/pdfs/marcelfpl12.pdf
|
||||
*
|
||||
* [[cite]] SimPL
|
||||
* SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov
|
||||
* http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf
|
||||
*/
|
||||
|
||||
#include <deque>
|
||||
#include <unordered_map>
|
||||
#include "log.h"
|
||||
#include "nextpnr.h"
|
||||
#include "place_common.h"
|
||||
#include "util.h"
|
||||
|
||||
NEXTPNR_NAMESPACE_BEGIN
|
||||
|
||||
namespace {
|
||||
// A simple internal representation for a sparse system of equations Ax = rhs
|
||||
// This is designed to decouple the functions that build the matrix to the engine that
|
||||
// solves it, and the representation that requires
|
||||
template <typename T> struct EquationSystem
|
||||
{
|
||||
EquationSystem(size_t rows, size_t cols)
|
||||
{
|
||||
A.resize(cols);
|
||||
rhs.resize(rows);
|
||||
}
|
||||
|
||||
// Simple sparse format, easy to convert to CCS for solver
|
||||
std::vector<std::vector<std::pair<int, T>>> A; // col -> (row, x[row, col]) sorted by row
|
||||
std::vector<T> rhs; // RHS vector
|
||||
|
||||
void reset()
|
||||
{
|
||||
for (auto &col : A)
|
||||
col.clear();
|
||||
std::fill(rhs.begin(), rhs.end(), T());
|
||||
}
|
||||
|
||||
void add_coeff(int row, int col, T val)
|
||||
{
|
||||
auto &Ac = A[col];
|
||||
// Binary search
|
||||
int b = 0, e = int(Ac.size()) - 1;
|
||||
while (b <= e) {
|
||||
int i = (b + e) / 2;
|
||||
if (Ac[i].first == row) {
|
||||
Ac[i].second += val;
|
||||
return;
|
||||
}
|
||||
if (Ac[i].first > row)
|
||||
e = i - 1;
|
||||
else
|
||||
b = i + 1;
|
||||
}
|
||||
Ac.insert(Ac.begin() + b, std::make_pair(row, val));
|
||||
}
|
||||
|
||||
void add_rhs(int row, T val) { rhs[row] += val; }
|
||||
};
|
||||
} // namespace
|
||||
|
||||
class HeAPPlacer
|
||||
{
|
||||
public:
|
||||
HeAPPlacer(Context *ctx) : ctx(ctx) {}
|
||||
|
||||
private:
|
||||
Context *ctx;
|
||||
|
||||
int diameter, max_x, max_y;
|
||||
std::vector<std::vector<std::vector<std::vector<BelId>>>> fast_bels;
|
||||
std::unordered_map<IdString, std::tuple<int, int>> bel_types;
|
||||
|
||||
// For fast handling of heterogeneosity during initial placement without full legalisation,
|
||||
// for each Bel type this goes from x or y to the nearest x or y where a Bel of a given type exists
|
||||
// This is particularly important for the iCE40 architecture, where multipliers and BRAM only exist at the
|
||||
// edges and corners respectively
|
||||
std::vector<std::vector<int>> nearest_row_with_bel;
|
||||
std::vector<std::vector<int>> nearest_col_with_bel;
|
||||
|
||||
// In some cases, we can't use bindBel because we allow overlap in the earlier stages. So we use this custom
|
||||
// structure instead
|
||||
struct CellLocation
|
||||
{
|
||||
int x, y;
|
||||
bool locked, global;
|
||||
};
|
||||
std::unordered_map<IdString, CellLocation> cell_locs;
|
||||
// The set of cells that we will actually place. This excludes locked cells and children cells of macros/chains
|
||||
// (only the root of each macro is placed.)
|
||||
std::vector<CellInfo *> place_cells;
|
||||
|
||||
// Place cells with the BEL attribute set to constrain them
|
||||
void place_constraints()
|
||||
{
|
||||
size_t placed_cells = 0;
|
||||
// Initial constraints placer
|
||||
for (auto &cell_entry : ctx->cells) {
|
||||
CellInfo *cell = cell_entry.second.get();
|
||||
auto loc = cell->attrs.find(ctx->id("BEL"));
|
||||
if (loc != cell->attrs.end()) {
|
||||
std::string loc_name = loc->second;
|
||||
BelId bel = ctx->getBelByName(ctx->id(loc_name));
|
||||
if (bel == BelId()) {
|
||||
log_error("No Bel named \'%s\' located for "
|
||||
"this chip (processing BEL attribute on \'%s\')\n",
|
||||
loc_name.c_str(), cell->name.c_str(ctx));
|
||||
}
|
||||
|
||||
IdString bel_type = ctx->getBelType(bel);
|
||||
if (bel_type != cell->type) {
|
||||
log_error("Bel \'%s\' of type \'%s\' does not match cell "
|
||||
"\'%s\' of type \'%s\'\n",
|
||||
loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx));
|
||||
}
|
||||
if (!ctx->isValidBelForCell(cell, bel)) {
|
||||
log_error("Bel \'%s\' of type \'%s\' is not valid for cell "
|
||||
"\'%s\' of type \'%s\'\n",
|
||||
loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx));
|
||||
}
|
||||
|
||||
auto bound_cell = ctx->getBoundBelCell(bel);
|
||||
if (bound_cell) {
|
||||
log_error("Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n",
|
||||
cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx));
|
||||
}
|
||||
|
||||
ctx->bindBel(bel, cell, STRENGTH_USER);
|
||||
placed_cells++;
|
||||
}
|
||||
}
|
||||
int constr_placed_cells = placed_cells;
|
||||
log_info("Placed %d cells based on constraints.\n", int(placed_cells));
|
||||
ctx->yield();
|
||||
}
|
||||
|
||||
// Construct the fast_bels, nearest_row_with_bel and nearest_col_with_bel
|
||||
void build_fast_bels()
|
||||
{
|
||||
|
||||
int num_bel_types = 0;
|
||||
for (auto bel : ctx->getBels()) {
|
||||
IdString type = ctx->getBelType(bel);
|
||||
if (bel_types.find(type) == bel_types.end()) {
|
||||
bel_types[type] = std::tuple<int, int>(num_bel_types++, 1);
|
||||
} else {
|
||||
std::get<1>(bel_types.at(type))++;
|
||||
}
|
||||
}
|
||||
for (auto bel : ctx->getBels()) {
|
||||
if (!ctx->checkBelAvail(bel))
|
||||
continue;
|
||||
Loc loc = ctx->getBelLocation(bel);
|
||||
IdString type = ctx->getBelType(bel);
|
||||
int type_idx = std::get<0>(bel_types.at(type));
|
||||
if (int(fast_bels.size()) < type_idx + 1)
|
||||
fast_bels.resize(type_idx + 1);
|
||||
if (int(fast_bels.at(type_idx).size()) < (loc.x + 1))
|
||||
fast_bels.at(type_idx).resize(loc.x + 1);
|
||||
if (int(fast_bels.at(type_idx).at(loc.x).size()) < (loc.y + 1))
|
||||
fast_bels.at(type_idx).at(loc.x).resize(loc.y + 1);
|
||||
max_x = std::max(max_x, loc.x);
|
||||
max_y = std::max(max_y, loc.y);
|
||||
fast_bels.at(type_idx).at(loc.x).at(loc.y).push_back(bel);
|
||||
}
|
||||
diameter = std::max(max_x, max_y) + 1;
|
||||
nearest_row_with_bel.resize(num_bel_types, std::vector<int>(max_y + 1, -1));
|
||||
nearest_col_with_bel.resize(num_bel_types, std::vector<int>(max_x + 1, -1));
|
||||
for (auto bel : ctx->getBels()) {
|
||||
if (!ctx->checkBelAvail(bel))
|
||||
continue;
|
||||
Loc loc = ctx->getBelLocation(bel);
|
||||
int type_idx = std::get<0>(bel_types.at(ctx->getBelType(bel)));
|
||||
auto &nr = nearest_row_with_bel.at(type_idx), &nc = nearest_col_with_bel.at(type_idx);
|
||||
// Traverse outwards through nearest_row_with_bel and nearest_col_with_bel, stopping once
|
||||
// another row/col is already recorded as being nearer
|
||||
for (int x = loc.x; x <= max_x; x++) {
|
||||
if (nc.at(x) == -1 || std::abs(loc.x - nc.at(x)) <= (x - loc.x))
|
||||
break;
|
||||
nc.at(x) = loc.x;
|
||||
}
|
||||
for (int x = loc.x - 1; x >= 0; x--) {
|
||||
if (nc.at(x) == -1 || std::abs(loc.x - nc.at(x)) <= (loc.x - x))
|
||||
break;
|
||||
nc.at(x) = loc.x;
|
||||
}
|
||||
for (int y = loc.y; y <= max_y; y++) {
|
||||
if (nr.at(y) == -1 || std::abs(loc.y - nc.at(y)) <= (y - loc.y))
|
||||
break;
|
||||
nr.at(y) = loc.y;
|
||||
}
|
||||
for (int y = loc.y - 1; y >= 0; y--) {
|
||||
if (nc.at(y) == -1 || std::abs(loc.y - nc.at(y)) <= (loc.y - y))
|
||||
break;
|
||||
nc.at(y) = loc.y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build up a random initial placement, without regard to legality
|
||||
// FIXME: Are there better approaches to the initial placement (e.g. greedy?)
|
||||
void seed_placement()
|
||||
{
|
||||
std::unordered_map<IdString, std::vector<BelId>> available_bels;
|
||||
for (auto bel : ctx->getBels()) {
|
||||
if (!ctx->checkBelAvail(bel))
|
||||
continue;
|
||||
available_bels[ctx->getBelType(bel)].push_back(bel);
|
||||
}
|
||||
for (auto &ab : available_bels)
|
||||
ctx->shuffle(ab.second);
|
||||
int placed_cell_count = 0;
|
||||
for (auto cell : sorted(ctx->cells)) {
|
||||
CellInfo *ci = cell.second;
|
||||
if (ci->bel != BelId()) {
|
||||
Loc loc = ctx->getBelLocation(ci->bel);
|
||||
cell_locs[cell.first].x = loc.x;
|
||||
cell_locs[cell.first].y = loc.y;
|
||||
cell_locs[cell.first].locked = true;
|
||||
cell_locs[cell.first].global = ctx->getBelGlobalBuf(ci->bel);
|
||||
|
||||
} else if (ci->constr_parent == nullptr) {
|
||||
if (!available_bels.count(ci->type) || available_bels.at(ci->type).empty())
|
||||
log_error("Unable to place cell '%s', no Bels remaining of type '%s'\n", ci->name.c_str(ctx),
|
||||
ci->type.c_str(ctx));
|
||||
BelId bel = available_bels.at(ci->type).back();
|
||||
available_bels.at(ci->type).pop_back();
|
||||
Loc loc = ctx->getBelLocation(bel);
|
||||
cell_locs[cell.first].x = loc.x;
|
||||
cell_locs[cell.first].y = loc.y;
|
||||
cell_locs[cell.first].locked = false;
|
||||
cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel);
|
||||
ci->udata = placed_cell_count++;
|
||||
place_cells.push_back(ci);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update the location of all children of a chain
|
||||
void update_chain(CellInfo *cell)
|
||||
{
|
||||
const auto &base = cell_locs[cell->name];
|
||||
for (auto child : cell->constr_children) {
|
||||
if (child->constr_x != child->UNCONSTR)
|
||||
cell_locs[child->name].x = base.x + child->constr_x;
|
||||
else
|
||||
cell_locs[child->name].x = base.x; // better handling of UNCONSTR?
|
||||
if (child->constr_y != child->UNCONSTR)
|
||||
cell_locs[child->name].y = base.y + child->constr_y;
|
||||
else
|
||||
cell_locs[child->name].y = base.y; // better handling of UNCONSTR?
|
||||
if (!child->constr_children.empty())
|
||||
update_chain(child);
|
||||
}
|
||||
}
|
||||
|
||||
// Update all chains
|
||||
void update_all_chains()
|
||||
{
|
||||
for (auto cell : place_cells) {
|
||||
if (!cell->constr_children.empty())
|
||||
update_chain(cell);
|
||||
}
|
||||
}
|
||||
|
||||
// Run a function on all ports of a net - including the driver and all users
|
||||
template <typename Tf> void foreach_port(NetInfo *net, Tf func)
|
||||
{
|
||||
if (net->driver.cell != nullptr)
|
||||
func(net->driver);
|
||||
for (auto &user : net->users)
|
||||
func(user);
|
||||
}
|
||||
|
||||
// Build the system of equations for either X or Y
|
||||
void build_equations(EquationSystem<double> &es, bool yaxis)
|
||||
{
|
||||
// Return the x or y position of a cell, depending on ydir
|
||||
auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; };
|
||||
|
||||
es.reset();
|
||||
|
||||
for (auto net : sorted(ctx->nets)) {
|
||||
NetInfo *ni = net.second;
|
||||
if (ni->driver.cell == nullptr)
|
||||
continue;
|
||||
if (ni->users.empty())
|
||||
continue;
|
||||
if (cell_locs.at(ni->driver.cell->name).global)
|
||||
continue;
|
||||
// Find the bounds of the net in this axis, and the ports that correspond to these bounds
|
||||
PortRef *lbport = nullptr, *ubport = nullptr;
|
||||
int lbpos = std::numeric_limits<int>::max(), ubpos = std::numeric_limits<int>::min();
|
||||
foreach_port(ni, [&](PortRef &port) {
|
||||
int pos = cell_pos(port.cell);
|
||||
if (pos < lbpos) {
|
||||
lbpos = pos;
|
||||
lbport = &port;
|
||||
}
|
||||
if (pos > ubpos) {
|
||||
ubpos = pos;
|
||||
ubport = &port;
|
||||
}
|
||||
});
|
||||
// Add all relevant connections to the matrix
|
||||
foreach_port(ni, [&](PortRef &port) {
|
||||
int this_pos = cell_pos(port.cell);
|
||||
auto process_arc = [&](PortRef *other) {
|
||||
if (other == &port)
|
||||
return;
|
||||
int o_pos = cell_pos(other->cell);
|
||||
if (o_pos == this_pos)
|
||||
return; // FIXME: or clamp to 1?
|
||||
double weight = 1. / (ni->users.size() * std::abs(o_pos - this_pos));
|
||||
// FIXME: add criticality to weighting
|
||||
|
||||
// If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation,
|
||||
// if the other end isn't fixed
|
||||
if (!cell_locs.at(port.cell->name).locked) {
|
||||
es.add_coeff(port.cell->udata, port.cell->udata, weight);
|
||||
if (!cell_locs.at(other->cell->name).locked)
|
||||
es.add_coeff(other->cell->udata, port.cell->udata, -weight);
|
||||
} else {
|
||||
// Add our fixed position to the other end's RHS
|
||||
if (!cell_locs.at(other->cell->name).locked)
|
||||
es.add_rhs(other->cell->udata, this_pos * weight);
|
||||
}
|
||||
// Opposite for the other end of the connection
|
||||
if (!cell_locs.at(other->cell->name).locked) {
|
||||
es.add_coeff(other->cell->udata, other->cell->udata, weight);
|
||||
if (!cell_locs.at(port.cell->name).locked)
|
||||
es.add_coeff(port.cell->udata, other->cell->udata, -weight);
|
||||
} else {
|
||||
// Add our fixed position to the other end's RHS
|
||||
if (!cell_locs.at(port.cell->name).locked)
|
||||
es.add_rhs(port.cell->udata, this_pos * weight);
|
||||
}
|
||||
};
|
||||
process_arc(lbport);
|
||||
process_arc(ubport);
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
NEXTPNR_NAMESPACE_END
|
Loading…
Reference in New Issue
Block a user