mistral: Account for TD input count limit

Signed-off-by: gatecat <gatecat@ds0.me>
2021-05-14 18:26:51 +01:00 · 2021-05-14 18:26:51 +01:00 · b2f45b1aab
commit b2f45b1aab
parent bd525d3548
4 changed files with 128 additions and 9 deletions
--- a/mistral/arch.cc
+++ b/mistral/arch.cc
@ -149,15 +149,23 @@ IdStringList Arch::getBelName(BelId bel) const
 bool Arch::isBelLocationValid(BelId bel) const
 {
    auto &data = bel_data(bel);
-    // Incremental validity update
    if (data.type == id_MISTRAL_COMB) {
-        return is_alm_legal(data.lab_data.lab, data.lab_data.alm);
+        return is_alm_legal(data.lab_data.lab, data.lab_data.alm) && check_lab_input_count(data.lab_data.lab);
    } else if (data.type == id_MISTRAL_FF) {
-        return is_alm_legal(data.lab_data.lab, data.lab_data.alm) && is_lab_ctrlset_legal(data.lab_data.lab);
+        return is_alm_legal(data.lab_data.lab, data.lab_data.alm) && check_lab_input_count(data.lab_data.lab) &&
+               is_lab_ctrlset_legal(data.lab_data.lab);
    }
    return true;
 }

+void Arch::update_bel(BelId bel)
+{
+    auto &data = bel_data(bel);
+    if (data.type == id_MISTRAL_COMB || data.type == id_MISTRAL_FF) {
+        update_alm_input_count(data.lab_data.lab, data.lab_data.alm);
+    }
+}
+
 WireId Arch::getWireByName(IdStringList name) const
 {
    // non-mistral wires
--- a/mistral/arch.h
+++ b/mistral/arch.h
@ -49,6 +49,12 @@ struct ALMInfo
    std::array<BelId, 4> ff_bels;

    bool l6_mode = false;
+
+    // Which CLK/ENA and ACLR is chosen for each half
+    std::array<int, 2> clk_ena_idx, aclr_idx;
+
+    // For keeping track of how many inputs are currently being used, for the LAB routeability check
+    int unique_input_count = 0;
 };

 struct LABInfo
@ -325,6 +331,18 @@ struct Arch : BaseArch<ArchRanges>

    bool isBelLocationValid(BelId bel) const override;

+    void bindBel(BelId bel, CellInfo *cell, PlaceStrength strength) override
+    {
+        BaseArch::bindBel(bel, cell, strength);
+        update_bel(bel);
+    }
+    void unbindBel(BelId bel) override
+    {
+        BaseArch::unbindBel(bel);
+        update_bel(bel);
+    }
+
+    void update_bel(BelId bel);
    BelId bel_by_block_idx(int x, int y, IdString type, int block_index) const;

    // -------------------------------------------------
@ -428,6 +446,7 @@ struct Arch : BaseArch<ArchRanges>
    bool is_comb_cell(IdString cell_type) const;        // lab.cc
    bool is_alm_legal(uint32_t lab, uint8_t alm) const; // lab.cc
    bool is_lab_ctrlset_legal(uint32_t lab) const;      // lab.cc
+    bool check_lab_input_count(uint32_t lab) const;     // lab.cc

    void assign_comb_info(CellInfo *cell) const; // lab.cc
    void assign_ff_info(CellInfo *cell) const;   // lab.cc
@ -435,6 +454,7 @@ struct Arch : BaseArch<ArchRanges>
    void lab_pre_route();                                   // lab.cc
    void assign_control_sets(uint32_t lab);                 // lab.cc
    void reassign_alm_inputs(uint32_t lab, uint8_t alm);    // lab.cc
+    void update_alm_input_count(uint32_t lab, uint8_t alm); // lab.cc

    uint64_t compute_lut_mask(uint32_t lab, uint8_t alm); // lab.cc

--- a/mistral/archdefs.h
+++ b/mistral/archdefs.h
@ -182,6 +182,13 @@ struct ArchCellInfo : BaseClusterInfo
            int used_lut_input_count; // excluding those null/constant
            int lut_bits_count;

+            // for the LAB routeability check (see the detailed description in lab.cc); usually the same signal feeding
+            // multiple ALMs in a LAB is counted multiple times, due to not knowing which routing resources it will need
+            // in each case. But carry chains where we know how things will pack are allowed to share across ALMs as a
+            // special case, primarily to support adders/subtractors with a 'B invert' control signal shared across all
+            // ALMs.
+            int chain_shared_input_count;
+
            bool is_carry, is_shared, is_extended;
            bool carry_start, carry_end;
        } combInfo;
--- a/mistral/lab.cc
+++ b/mistral/lab.cc
@ -233,16 +233,21 @@ void Arch::assign_comb_info(CellInfo *cell) const
    cell->combInfo.is_extended = false;
    cell->combInfo.carry_start = false;
    cell->combInfo.carry_end = false;
+    cell->combInfo.chain_shared_input_count = 0;

    if (cell->type == id_MISTRAL_ALUT_ARITH) {
        cell->combInfo.is_carry = true;
        cell->combInfo.lut_input_count = 5;
        cell->combInfo.lut_bits_count = 32;
+
        // This is a special case in terms of naming
+        const std::array<IdString, 5> arith_pins{id_A, id_B, id_C, id_D0, id_D1};
+        {
            int i = 0;
-        for (auto pin : {id_A, id_B, id_C, id_D0, id_D1}) {
+            for (auto pin : arith_pins) {
                cell->combInfo.lut_in[i++] = get_net_or_empty(cell, pin);
            }
+        }

        const NetInfo *ci = get_net_or_empty(cell, id_CI);
        const NetInfo *co = get_net_or_empty(cell, id_CO);
@ -250,6 +255,22 @@ void Arch::assign_comb_info(CellInfo *cell) const
        cell->combInfo.comb_out = get_net_or_empty(cell, id_SO);
        cell->combInfo.carry_start = (ci == nullptr) || (ci->driver.cell == nullptr);
        cell->combInfo.carry_end = (co == nullptr) || (co->users.empty());
+
+        // Compute cross-ALM routing sharing - only check the z=0 case inside ALMs
+        if (cell->constr_z > 0 && ((cell->constr_z % 2) == 0) && ci) {
+            const CellInfo *prev = ci->driver.cell;
+            if (prev != nullptr) {
+                for (int i = 0; i < 5; i++) {
+                    const NetInfo *a = get_net_or_empty(cell, arith_pins[i]);
+                    if (a == nullptr)
+                        continue;
+                    const NetInfo *b = get_net_or_empty(prev, arith_pins[i]);
+                    if (a == b)
+                        ++cell->combInfo.chain_shared_input_count;
+                }
+            }
+        }
+
    } else {
        cell->combInfo.lut_input_count = 0;
        switch (cell->type.index) {
@ -402,6 +423,69 @@ bool Arch::is_alm_legal(uint32_t lab, uint8_t alm) const
    return true;
 }

+void Arch::update_alm_input_count(uint32_t lab, uint8_t alm)
+{
+    // TODO: duplication with above
+    auto &alm_data = labs.at(lab).alms.at(alm);
+    // Get cells into an array for fast access
+    std::array<const CellInfo *, 2> luts{getBoundBelCell(alm_data.lut_bels[0]), getBoundBelCell(alm_data.lut_bels[1])};
+    std::array<const CellInfo *, 4> ffs{getBoundBelCell(alm_data.ff_bels[0]), getBoundBelCell(alm_data.ff_bels[1]),
+                                        getBoundBelCell(alm_data.ff_bels[2]), getBoundBelCell(alm_data.ff_bels[3])};
+    int total_inputs = 0;
+    int total_lut_inputs = 0;
+    for (int i = 0; i < 2; i++) {
+        if (!luts[i])
+            continue;
+        total_lut_inputs += luts[i]->combInfo.used_lut_input_count - luts[i]->combInfo.chain_shared_input_count;
+    }
+    int shared_lut_inputs = 0;
+    if (luts[0] && luts[1]) {
+        for (int i = 0; i < luts[1]->combInfo.lut_input_count; i++) {
+            const NetInfo *sig = luts[1]->combInfo.lut_in[i];
+            if (!sig)
+                continue;
+            for (int j = 0; j < luts[0]->combInfo.lut_input_count; j++) {
+                if (sig == luts[0]->combInfo.lut_in[j]) {
+                    ++shared_lut_inputs;
+                    break;
+                }
+            }
+            if (shared_lut_inputs >= 2) {
+                // only 2 inputs have guaranteed sharing, without routeability based LUT permutation at least
+                break;
+            }
+        }
+    }
+    total_inputs = std::max(0, total_lut_inputs - shared_lut_inputs);
+    for (int i = 0; i < 4; i++) {
+        const CellInfo *ff = ffs[i];
+        if (!ff)
+            continue;
+        if (ff->ffInfo.sdata)
+            ++total_inputs;
+        // FF input doesn't consume routing resources if driven by associated LUT
+        if (ff->ffInfo.datain && (!luts[i / 2] || ff->ffInfo.datain != luts[i / 2]->combInfo.comb_out))
+            ++total_inputs;
+    }
+    alm_data.unique_input_count = total_inputs;
+}
+
+bool Arch::check_lab_input_count(uint32_t lab) const
+{
+    // There are only 46 TD signals available to route signals from general routing to the ALM input. Currently, we
+    // check the total sum of ALM inputs is less than 42; 46 minus 4 FF control inputs. This is a conservative check for
+    // several reasons, because LD signals are also available for feedback routing from ALM output to input, and because
+    // TD signals may be shared if the same net routes to multiple ALMs. But these cases will need careful handling and
+    // LUT permutation during routing to be useful; and in any event conservative LAB packing will help nextpnr's
+    // currently perfunctory place and route algorithms to achieve satisfactory runtimes.
+    int count = 0;
+    auto &lab_data = labs.at(lab);
+    for (int i = 0; i < 10; i++) {
+        count += lab_data.alms.at(i).unique_input_count;
+    }
+    return (count <= 42);
+}
+
 namespace {
 bool check_assign_sig(ControlSig &sig_set, const ControlSig &sig)
 {