From b2f45b1aab596d4185f3a6c6a60d6b97c6aa4ad0 Mon Sep 17 00:00:00 2001 From: gatecat Date: Fri, 14 May 2021 18:26:51 +0100 Subject: [PATCH] mistral: Account for TD input count limit Signed-off-by: gatecat --- mistral/arch.cc | 14 ++++++-- mistral/arch.h | 26 ++++++++++++-- mistral/archdefs.h | 7 ++++ mistral/lab.cc | 90 ++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 128 insertions(+), 9 deletions(-) diff --git a/mistral/arch.cc b/mistral/arch.cc index d5bc424c..9c661439 100644 --- a/mistral/arch.cc +++ b/mistral/arch.cc @@ -149,15 +149,23 @@ IdStringList Arch::getBelName(BelId bel) const bool Arch::isBelLocationValid(BelId bel) const { auto &data = bel_data(bel); - // Incremental validity update if (data.type == id_MISTRAL_COMB) { - return is_alm_legal(data.lab_data.lab, data.lab_data.alm); + return is_alm_legal(data.lab_data.lab, data.lab_data.alm) && check_lab_input_count(data.lab_data.lab); } else if (data.type == id_MISTRAL_FF) { - return is_alm_legal(data.lab_data.lab, data.lab_data.alm) && is_lab_ctrlset_legal(data.lab_data.lab); + return is_alm_legal(data.lab_data.lab, data.lab_data.alm) && check_lab_input_count(data.lab_data.lab) && + is_lab_ctrlset_legal(data.lab_data.lab); } return true; } +void Arch::update_bel(BelId bel) +{ + auto &data = bel_data(bel); + if (data.type == id_MISTRAL_COMB || data.type == id_MISTRAL_FF) { + update_alm_input_count(data.lab_data.lab, data.lab_data.alm); + } +} + WireId Arch::getWireByName(IdStringList name) const { // non-mistral wires diff --git a/mistral/arch.h b/mistral/arch.h index 4e603177..97d6a2b1 100644 --- a/mistral/arch.h +++ b/mistral/arch.h @@ -49,6 +49,12 @@ struct ALMInfo std::array ff_bels; bool l6_mode = false; + + // Which CLK/ENA and ACLR is chosen for each half + std::array clk_ena_idx, aclr_idx; + + // For keeping track of how many inputs are currently being used, for the LAB routeability check + int unique_input_count = 0; }; struct LABInfo @@ -325,6 +331,18 @@ struct Arch : BaseArch bool isBelLocationValid(BelId bel) const override; + void bindBel(BelId bel, CellInfo *cell, PlaceStrength strength) override + { + BaseArch::bindBel(bel, cell, strength); + update_bel(bel); + } + void unbindBel(BelId bel) override + { + BaseArch::unbindBel(bel); + update_bel(bel); + } + + void update_bel(BelId bel); BelId bel_by_block_idx(int x, int y, IdString type, int block_index) const; // ------------------------------------------------- @@ -428,13 +446,15 @@ struct Arch : BaseArch bool is_comb_cell(IdString cell_type) const; // lab.cc bool is_alm_legal(uint32_t lab, uint8_t alm) const; // lab.cc bool is_lab_ctrlset_legal(uint32_t lab) const; // lab.cc + bool check_lab_input_count(uint32_t lab) const; // lab.cc void assign_comb_info(CellInfo *cell) const; // lab.cc void assign_ff_info(CellInfo *cell) const; // lab.cc - void lab_pre_route(); // lab.cc - void assign_control_sets(uint32_t lab); // lab.cc - void reassign_alm_inputs(uint32_t lab, uint8_t alm); // lab.cc + void lab_pre_route(); // lab.cc + void assign_control_sets(uint32_t lab); // lab.cc + void reassign_alm_inputs(uint32_t lab, uint8_t alm); // lab.cc + void update_alm_input_count(uint32_t lab, uint8_t alm); // lab.cc uint64_t compute_lut_mask(uint32_t lab, uint8_t alm); // lab.cc diff --git a/mistral/archdefs.h b/mistral/archdefs.h index 35b5bf63..181bc618 100644 --- a/mistral/archdefs.h +++ b/mistral/archdefs.h @@ -182,6 +182,13 @@ struct ArchCellInfo : BaseClusterInfo int used_lut_input_count; // excluding those null/constant int lut_bits_count; + // for the LAB routeability check (see the detailed description in lab.cc); usually the same signal feeding + // multiple ALMs in a LAB is counted multiple times, due to not knowing which routing resources it will need + // in each case. But carry chains where we know how things will pack are allowed to share across ALMs as a + // special case, primarily to support adders/subtractors with a 'B invert' control signal shared across all + // ALMs. + int chain_shared_input_count; + bool is_carry, is_shared, is_extended; bool carry_start, carry_end; } combInfo; diff --git a/mistral/lab.cc b/mistral/lab.cc index 2d57ed42..3e6292b3 100644 --- a/mistral/lab.cc +++ b/mistral/lab.cc @@ -233,15 +233,20 @@ void Arch::assign_comb_info(CellInfo *cell) const cell->combInfo.is_extended = false; cell->combInfo.carry_start = false; cell->combInfo.carry_end = false; + cell->combInfo.chain_shared_input_count = 0; if (cell->type == id_MISTRAL_ALUT_ARITH) { cell->combInfo.is_carry = true; cell->combInfo.lut_input_count = 5; cell->combInfo.lut_bits_count = 32; + // This is a special case in terms of naming - int i = 0; - for (auto pin : {id_A, id_B, id_C, id_D0, id_D1}) { - cell->combInfo.lut_in[i++] = get_net_or_empty(cell, pin); + const std::array arith_pins{id_A, id_B, id_C, id_D0, id_D1}; + { + int i = 0; + for (auto pin : arith_pins) { + cell->combInfo.lut_in[i++] = get_net_or_empty(cell, pin); + } } const NetInfo *ci = get_net_or_empty(cell, id_CI); @@ -250,6 +255,22 @@ void Arch::assign_comb_info(CellInfo *cell) const cell->combInfo.comb_out = get_net_or_empty(cell, id_SO); cell->combInfo.carry_start = (ci == nullptr) || (ci->driver.cell == nullptr); cell->combInfo.carry_end = (co == nullptr) || (co->users.empty()); + + // Compute cross-ALM routing sharing - only check the z=0 case inside ALMs + if (cell->constr_z > 0 && ((cell->constr_z % 2) == 0) && ci) { + const CellInfo *prev = ci->driver.cell; + if (prev != nullptr) { + for (int i = 0; i < 5; i++) { + const NetInfo *a = get_net_or_empty(cell, arith_pins[i]); + if (a == nullptr) + continue; + const NetInfo *b = get_net_or_empty(prev, arith_pins[i]); + if (a == b) + ++cell->combInfo.chain_shared_input_count; + } + } + } + } else { cell->combInfo.lut_input_count = 0; switch (cell->type.index) { @@ -402,6 +423,69 @@ bool Arch::is_alm_legal(uint32_t lab, uint8_t alm) const return true; } +void Arch::update_alm_input_count(uint32_t lab, uint8_t alm) +{ + // TODO: duplication with above + auto &alm_data = labs.at(lab).alms.at(alm); + // Get cells into an array for fast access + std::array luts{getBoundBelCell(alm_data.lut_bels[0]), getBoundBelCell(alm_data.lut_bels[1])}; + std::array ffs{getBoundBelCell(alm_data.ff_bels[0]), getBoundBelCell(alm_data.ff_bels[1]), + getBoundBelCell(alm_data.ff_bels[2]), getBoundBelCell(alm_data.ff_bels[3])}; + int total_inputs = 0; + int total_lut_inputs = 0; + for (int i = 0; i < 2; i++) { + if (!luts[i]) + continue; + total_lut_inputs += luts[i]->combInfo.used_lut_input_count - luts[i]->combInfo.chain_shared_input_count; + } + int shared_lut_inputs = 0; + if (luts[0] && luts[1]) { + for (int i = 0; i < luts[1]->combInfo.lut_input_count; i++) { + const NetInfo *sig = luts[1]->combInfo.lut_in[i]; + if (!sig) + continue; + for (int j = 0; j < luts[0]->combInfo.lut_input_count; j++) { + if (sig == luts[0]->combInfo.lut_in[j]) { + ++shared_lut_inputs; + break; + } + } + if (shared_lut_inputs >= 2) { + // only 2 inputs have guaranteed sharing, without routeability based LUT permutation at least + break; + } + } + } + total_inputs = std::max(0, total_lut_inputs - shared_lut_inputs); + for (int i = 0; i < 4; i++) { + const CellInfo *ff = ffs[i]; + if (!ff) + continue; + if (ff->ffInfo.sdata) + ++total_inputs; + // FF input doesn't consume routing resources if driven by associated LUT + if (ff->ffInfo.datain && (!luts[i / 2] || ff->ffInfo.datain != luts[i / 2]->combInfo.comb_out)) + ++total_inputs; + } + alm_data.unique_input_count = total_inputs; +} + +bool Arch::check_lab_input_count(uint32_t lab) const +{ + // There are only 46 TD signals available to route signals from general routing to the ALM input. Currently, we + // check the total sum of ALM inputs is less than 42; 46 minus 4 FF control inputs. This is a conservative check for + // several reasons, because LD signals are also available for feedback routing from ALM output to input, and because + // TD signals may be shared if the same net routes to multiple ALMs. But these cases will need careful handling and + // LUT permutation during routing to be useful; and in any event conservative LAB packing will help nextpnr's + // currently perfunctory place and route algorithms to achieve satisfactory runtimes. + int count = 0; + auto &lab_data = labs.at(lab); + for (int i = 0; i < 10; i++) { + count += lab_data.alms.at(i).unique_input_count; + } + return (count <= 42); +} + namespace { bool check_assign_sig(ControlSig &sig_set, const ControlSig &sig) {