HeAP: Initial infrastructure

Signed-off-by: David Shah <dave@ds0.me>
2019-01-10 11:18:47 +00:00 · 2019-01-10 11:18:47 +00:00 · e36460b83e
commit e36460b83e
parent 4cf8549f5c
1 changed files with 366 additions and 0 deletions
--- a/common/placer_heap.cc
+++ b/common/placer_heap.cc
@ -0,0 +1,366 @@
+/*
+ *  nextpnr -- Next Generation Place and Route
+ *
+ *  Copyright (C) 2019  David Shah <david@symbioticeda.com>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ *  [[cite]] HeAP
+ *  Analytical Placement for Heterogeneous FPGAs, Marcel Gort and Jason H. Anderson
+ *  https://janders.eecg.utoronto.ca/pdfs/marcelfpl12.pdf
+ *
+ *  [[cite]] SimPL
+ *  SimPL: An Effective Placement Algorithm, Myung-Chul Kim, Dong-Jin Lee and Igor L. Markov
+ *  http://www.ece.umich.edu/cse/awards/pdfs/iccad10-simpl.pdf
+ */
+
+#include <deque>
+#include <unordered_map>
+#include "log.h"
+#include "nextpnr.h"
+#include "place_common.h"
+#include "util.h"
+
+NEXTPNR_NAMESPACE_BEGIN
+
+namespace {
+// A simple internal representation for a sparse system of equations Ax = rhs
+// This is designed to decouple the functions that build the matrix to the engine that
+// solves it, and the representation that requires
+template <typename T> struct EquationSystem
+{
+    EquationSystem(size_t rows, size_t cols)
+    {
+        A.resize(cols);
+        rhs.resize(rows);
+    }
+
+    // Simple sparse format, easy to convert to CCS for solver
+    std::vector<std::vector<std::pair<int, T>>> A; // col -> (row, x[row, col]) sorted by row
+    std::vector<T> rhs;                            // RHS vector
+
+    void reset()
+    {
+        for (auto &col : A)
+            col.clear();
+        std::fill(rhs.begin(), rhs.end(), T());
+    }
+
+    void add_coeff(int row, int col, T val)
+    {
+        auto &Ac = A[col];
+        // Binary search
+        int b = 0, e = int(Ac.size()) - 1;
+        while (b <= e) {
+            int i = (b + e) / 2;
+            if (Ac[i].first == row) {
+                Ac[i].second += val;
+                return;
+            }
+            if (Ac[i].first > row)
+                e = i - 1;
+            else
+                b = i + 1;
+        }
+        Ac.insert(Ac.begin() + b, std::make_pair(row, val));
+    }
+
+    void add_rhs(int row, T val) { rhs[row] += val; }
+};
+} // namespace
+
+class HeAPPlacer
+{
+  public:
+    HeAPPlacer(Context *ctx) : ctx(ctx) {}
+
+  private:
+    Context *ctx;
+
+    int diameter, max_x, max_y;
+    std::vector<std::vector<std::vector<std::vector<BelId>>>> fast_bels;
+    std::unordered_map<IdString, std::tuple<int, int>> bel_types;
+
+    // For fast handling of heterogeneosity during initial placement without full legalisation,
+    // for each Bel type this goes from x or y to the nearest x or y where a Bel of a given type exists
+    // This is particularly important for the iCE40 architecture, where multipliers and BRAM only exist at the
+    // edges and corners respectively
+    std::vector<std::vector<int>> nearest_row_with_bel;
+    std::vector<std::vector<int>> nearest_col_with_bel;
+
+    // In some cases, we can't use bindBel because we allow overlap in the earlier stages. So we use this custom
+    // structure instead
+    struct CellLocation
+    {
+        int x, y;
+        bool locked, global;
+    };
+    std::unordered_map<IdString, CellLocation> cell_locs;
+    // The set of cells that we will actually place. This excludes locked cells and children cells of macros/chains
+    // (only the root of each macro is placed.)
+    std::vector<CellInfo *> place_cells;
+
+    // Place cells with the BEL attribute set to constrain them
+    void place_constraints()
+    {
+        size_t placed_cells = 0;
+        // Initial constraints placer
+        for (auto &cell_entry : ctx->cells) {
+            CellInfo *cell = cell_entry.second.get();
+            auto loc = cell->attrs.find(ctx->id("BEL"));
+            if (loc != cell->attrs.end()) {
+                std::string loc_name = loc->second;
+                BelId bel = ctx->getBelByName(ctx->id(loc_name));
+                if (bel == BelId()) {
+                    log_error("No Bel named \'%s\' located for "
+                              "this chip (processing BEL attribute on \'%s\')\n",
+                              loc_name.c_str(), cell->name.c_str(ctx));
+                }
+
+                IdString bel_type = ctx->getBelType(bel);
+                if (bel_type != cell->type) {
+                    log_error("Bel \'%s\' of type \'%s\' does not match cell "
+                              "\'%s\' of type \'%s\'\n",
+                              loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx));
+                }
+                if (!ctx->isValidBelForCell(cell, bel)) {
+                    log_error("Bel \'%s\' of type \'%s\' is not valid for cell "
+                              "\'%s\' of type \'%s\'\n",
+                              loc_name.c_str(), bel_type.c_str(ctx), cell->name.c_str(ctx), cell->type.c_str(ctx));
+                }
+
+                auto bound_cell = ctx->getBoundBelCell(bel);
+                if (bound_cell) {
+                    log_error("Cell \'%s\' cannot be bound to bel \'%s\' since it is already bound to cell \'%s\'\n",
+                              cell->name.c_str(ctx), loc_name.c_str(), bound_cell->name.c_str(ctx));
+                }
+
+                ctx->bindBel(bel, cell, STRENGTH_USER);
+                placed_cells++;
+            }
+        }
+        int constr_placed_cells = placed_cells;
+        log_info("Placed %d cells based on constraints.\n", int(placed_cells));
+        ctx->yield();
+    }
+
+    // Construct the fast_bels, nearest_row_with_bel and nearest_col_with_bel
+    void build_fast_bels()
+    {
+
+        int num_bel_types = 0;
+        for (auto bel : ctx->getBels()) {
+            IdString type = ctx->getBelType(bel);
+            if (bel_types.find(type) == bel_types.end()) {
+                bel_types[type] = std::tuple<int, int>(num_bel_types++, 1);
+            } else {
+                std::get<1>(bel_types.at(type))++;
+            }
+        }
+        for (auto bel : ctx->getBels()) {
+            if (!ctx->checkBelAvail(bel))
+                continue;
+            Loc loc = ctx->getBelLocation(bel);
+            IdString type = ctx->getBelType(bel);
+            int type_idx = std::get<0>(bel_types.at(type));
+            if (int(fast_bels.size()) < type_idx + 1)
+                fast_bels.resize(type_idx + 1);
+            if (int(fast_bels.at(type_idx).size()) < (loc.x + 1))
+                fast_bels.at(type_idx).resize(loc.x + 1);
+            if (int(fast_bels.at(type_idx).at(loc.x).size()) < (loc.y + 1))
+                fast_bels.at(type_idx).at(loc.x).resize(loc.y + 1);
+            max_x = std::max(max_x, loc.x);
+            max_y = std::max(max_y, loc.y);
+            fast_bels.at(type_idx).at(loc.x).at(loc.y).push_back(bel);
+        }
+        diameter = std::max(max_x, max_y) + 1;
+        nearest_row_with_bel.resize(num_bel_types, std::vector<int>(max_y + 1, -1));
+        nearest_col_with_bel.resize(num_bel_types, std::vector<int>(max_x + 1, -1));
+        for (auto bel : ctx->getBels()) {
+            if (!ctx->checkBelAvail(bel))
+                continue;
+            Loc loc = ctx->getBelLocation(bel);
+            int type_idx = std::get<0>(bel_types.at(ctx->getBelType(bel)));
+            auto &nr = nearest_row_with_bel.at(type_idx), &nc = nearest_col_with_bel.at(type_idx);
+            // Traverse outwards through nearest_row_with_bel and nearest_col_with_bel, stopping once
+            // another row/col is already recorded as being nearer
+            for (int x = loc.x; x <= max_x; x++) {
+                if (nc.at(x) == -1 || std::abs(loc.x - nc.at(x)) <= (x - loc.x))
+                    break;
+                nc.at(x) = loc.x;
+            }
+            for (int x = loc.x - 1; x >= 0; x--) {
+                if (nc.at(x) == -1 || std::abs(loc.x - nc.at(x)) <= (loc.x - x))
+                    break;
+                nc.at(x) = loc.x;
+            }
+            for (int y = loc.y; y <= max_y; y++) {
+                if (nr.at(y) == -1 || std::abs(loc.y - nc.at(y)) <= (y - loc.y))
+                    break;
+                nr.at(y) = loc.y;
+            }
+            for (int y = loc.y - 1; y >= 0; y--) {
+                if (nc.at(y) == -1 || std::abs(loc.y - nc.at(y)) <= (loc.y - y))
+                    break;
+                nc.at(y) = loc.y;
+            }
+        }
+    }
+
+    // Build up a random initial placement, without regard to legality
+    // FIXME: Are there better approaches to the initial placement (e.g. greedy?)
+    void seed_placement()
+    {
+        std::unordered_map<IdString, std::vector<BelId>> available_bels;
+        for (auto bel : ctx->getBels()) {
+            if (!ctx->checkBelAvail(bel))
+                continue;
+            available_bels[ctx->getBelType(bel)].push_back(bel);
+        }
+        for (auto &ab : available_bels)
+            ctx->shuffle(ab.second);
+        int placed_cell_count = 0;
+        for (auto cell : sorted(ctx->cells)) {
+            CellInfo *ci = cell.second;
+            if (ci->bel != BelId()) {
+                Loc loc = ctx->getBelLocation(ci->bel);
+                cell_locs[cell.first].x = loc.x;
+                cell_locs[cell.first].y = loc.y;
+                cell_locs[cell.first].locked = true;
+                cell_locs[cell.first].global = ctx->getBelGlobalBuf(ci->bel);
+
+            } else if (ci->constr_parent == nullptr) {
+                if (!available_bels.count(ci->type) || available_bels.at(ci->type).empty())
+                    log_error("Unable to place cell '%s', no Bels remaining of type '%s'\n", ci->name.c_str(ctx),
+                              ci->type.c_str(ctx));
+                BelId bel = available_bels.at(ci->type).back();
+                available_bels.at(ci->type).pop_back();
+                Loc loc = ctx->getBelLocation(bel);
+                cell_locs[cell.first].x = loc.x;
+                cell_locs[cell.first].y = loc.y;
+                cell_locs[cell.first].locked = false;
+                cell_locs[cell.first].global = ctx->getBelGlobalBuf(bel);
+                ci->udata = placed_cell_count++;
+                place_cells.push_back(ci);
+            }
+        }
+    }
+
+    // Update the location of all children of a chain
+    void update_chain(CellInfo *cell)
+    {
+        const auto &base = cell_locs[cell->name];
+        for (auto child : cell->constr_children) {
+            if (child->constr_x != child->UNCONSTR)
+                cell_locs[child->name].x = base.x + child->constr_x;
+            else
+                cell_locs[child->name].x = base.x; // better handling of UNCONSTR?
+            if (child->constr_y != child->UNCONSTR)
+                cell_locs[child->name].y = base.y + child->constr_y;
+            else
+                cell_locs[child->name].y = base.y; // better handling of UNCONSTR?
+            if (!child->constr_children.empty())
+                update_chain(child);
+        }
+    }
+
+    // Update all chains
+    void update_all_chains()
+    {
+        for (auto cell : place_cells) {
+            if (!cell->constr_children.empty())
+                update_chain(cell);
+        }
+    }
+
+    // Run a function on all ports of a net - including the driver and all users
+    template <typename Tf> void foreach_port(NetInfo *net, Tf func)
+    {
+        if (net->driver.cell != nullptr)
+            func(net->driver);
+        for (auto &user : net->users)
+            func(user);
+    }
+
+    // Build the system of equations for either X or Y
+    void build_equations(EquationSystem<double> &es, bool yaxis)
+    {
+        // Return the x or y position of a cell, depending on ydir
+        auto cell_pos = [&](CellInfo *cell) { return yaxis ? cell_locs.at(cell->name).y : cell_locs.at(cell->name).x; };
+
+        es.reset();
+
+        for (auto net : sorted(ctx->nets)) {
+            NetInfo *ni = net.second;
+            if (ni->driver.cell == nullptr)
+                continue;
+            if (ni->users.empty())
+                continue;
+            if (cell_locs.at(ni->driver.cell->name).global)
+                continue;
+            // Find the bounds of the net in this axis, and the ports that correspond to these bounds
+            PortRef *lbport = nullptr, *ubport = nullptr;
+            int lbpos = std::numeric_limits<int>::max(), ubpos = std::numeric_limits<int>::min();
+            foreach_port(ni, [&](PortRef &port) {
+                int pos = cell_pos(port.cell);
+                if (pos < lbpos) {
+                    lbpos = pos;
+                    lbport = &port;
+                }
+                if (pos > ubpos) {
+                    ubpos = pos;
+                    ubport = &port;
+                }
+            });
+            // Add all relevant connections to the matrix
+            foreach_port(ni, [&](PortRef &port) {
+                int this_pos = cell_pos(port.cell);
+                auto process_arc = [&](PortRef *other) {
+                    if (other == &port)
+                        return;
+                    int o_pos = cell_pos(other->cell);
+                    if (o_pos == this_pos)
+                        return; // FIXME: or clamp to 1?
+                    double weight = 1. / (ni->users.size() * std::abs(o_pos - this_pos));
+                    // FIXME: add criticality to weighting
+
+                    // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation,
+                    // if the other end isn't fixed
+                    if (!cell_locs.at(port.cell->name).locked) {
+                        es.add_coeff(port.cell->udata, port.cell->udata, weight);
+                        if (!cell_locs.at(other->cell->name).locked)
+                            es.add_coeff(other->cell->udata, port.cell->udata, -weight);
+                    } else {
+                        // Add our fixed position to the other end's RHS
+                        if (!cell_locs.at(other->cell->name).locked)
+                            es.add_rhs(other->cell->udata, this_pos * weight);
+                    }
+                    // Opposite for the other end of the connection
+                    if (!cell_locs.at(other->cell->name).locked) {
+                        es.add_coeff(other->cell->udata, other->cell->udata, weight);
+                        if (!cell_locs.at(port.cell->name).locked)
+                            es.add_coeff(port.cell->udata, other->cell->udata, -weight);
+                    } else {
+                        // Add our fixed position to the other end's RHS
+                        if (!cell_locs.at(port.cell->name).locked)
+                            es.add_rhs(port.cell->udata, this_pos * weight);
+                    }
+                };
+                process_arc(lbport);
+                process_arc(ubport);
+            });
+        }
+    }
+};
+
+NEXTPNR_NAMESPACE_END