From f36188f2e18783ea692f8b6132712a0d1da6b6b7 Mon Sep 17 00:00:00 2001 From: gatecat Date: Mon, 13 Dec 2021 16:04:45 +0000 Subject: [PATCH] ecp5: LUT permutation support Signed-off-by: gatecat --- .cirrus/Dockerfile.ubuntu20.04 | 2 +- ecp5/arch.cc | 4 +++ ecp5/arch.h | 50 +++++++++++++++++++++++++++-- ecp5/archdefs.h | 1 + ecp5/bitstream.cc | 58 ++++++++++++++++++++++++++++++++-- ecp5/main.cc | 3 ++ ecp5/pack.cc | 4 ++- ecp5/trellis_import.py | 4 ++- 8 files changed, 118 insertions(+), 8 deletions(-) diff --git a/.cirrus/Dockerfile.ubuntu20.04 b/.cirrus/Dockerfile.ubuntu20.04 index 2e39058d..4f229d2d 100644 --- a/.cirrus/Dockerfile.ubuntu20.04 +++ b/.cirrus/Dockerfile.ubuntu20.04 @@ -48,7 +48,7 @@ RUN set -e -x ;\ cd /usr/local/src ;\ git clone --recursive https://github.com/YosysHQ/prjtrellis.git ;\ cd prjtrellis ;\ - git reset --hard 210a0a72757d57b278ac7397ae6b14729f149b10 ;\ + git reset --hard 7239331d5463321d4864164f320beef67310f1e5 ;\ cd libtrellis ;\ cmake . ;\ make -j $(nproc) ;\ diff --git a/ecp5/arch.cc b/ecp5/arch.cc index ec64fb82..95a27682 100644 --- a/ecp5/arch.cc +++ b/ecp5/arch.cc @@ -144,6 +144,8 @@ Arch::Arch(ArchArgs args) : args(args) n_pips++; } pip2net.resize(n_pips, nullptr); + + lutperm_allowed.resize(chip_info->width * chip_info->height * 4); } // ----------------------------------------------------------------------- @@ -625,6 +627,8 @@ bool Arch::route() { std::string router = str_or_default(settings, id("router"), defaultRouter); + disable_router_lutperm = getCtx()->setting("arch.disable_router_lutperm", false); + setup_wire_locations(); route_ecp5_globals(getCtx()); assignArchInfo(); diff --git a/ecp5/arch.h b/ecp5/arch.h index e7bf64fe..51a919bb 100644 --- a/ecp5/arch.h +++ b/ecp5/arch.h @@ -58,8 +58,15 @@ NPNR_PACKED_STRUCT(struct PipInfoPOD { int16_t timing_class; int8_t tile_type; int8_t pip_type; + int16_t lutperm_flags; + int16_t padding; }); +inline bool is_lutperm_pip(int16_t flags) { return flags & 0x4000; } +inline uint8_t lutperm_lut(int16_t flags) { return (flags >> 4) & 0x7; } +inline uint8_t lutperm_out(int16_t flags) { return (flags >> 2) & 0x3; } +inline uint8_t lutperm_in(int16_t flags) { return flags & 0x3; } + NPNR_PACKED_STRUCT(struct PipLocatorPOD { LocationPOD rel_loc; int32_t index; @@ -446,6 +453,14 @@ struct Arch : BaseArch mutable dict pip_by_name; std::vector bel_to_cell; + enum class LutPermRule + { + NONE, + CARRY, + ALL, + }; + std::vector lutperm_allowed; + bool disable_router_lutperm = false; // faster replacements for base_pip2net, base_wire2net // indexed by get_pip_vecidx() @@ -509,6 +524,12 @@ struct Arch : BaseArch return (bel.location.y * chip_info->width + bel.location.x) * max_loc_bels + bel.index; } + int get_slice_index(int x, int y, int slice) const + { + NPNR_ASSERT(slice >= 0 && slice < 4); + return (y * chip_info->width + x) * 4 + slice; + } + void bindBel(BelId bel, CellInfo *cell, PlaceStrength strength) override { NPNR_ASSERT(bel != BelId()); @@ -517,6 +538,11 @@ struct Arch : BaseArch bel_to_cell[idx] = cell; cell->bel = bel; cell->belStrength = strength; + if (getBelType(bel) == id_TRELLIS_SLICE) { + lutperm_allowed.at(get_slice_index(bel.location.x, bel.location.y, getBelLocation(bel).z)) = + (cell->sliceInfo.is_memory ? LutPermRule::NONE + : (cell->sliceInfo.is_carry ? LutPermRule::CARRY : LutPermRule::ALL)); + } refreshUiBel(bel); } @@ -744,11 +770,31 @@ struct Arch : BaseArch p2n_entry->wires.erase(dst); p2n_entry = nullptr; } - bool checkPipAvail(PipId pip) const override { return getBoundPipNet(pip) == nullptr; } + bool is_pip_blocked(PipId pip) const + { + auto &pip_data = loc_info(pip)->pip_data[pip.index]; + int lp = pip_data.lutperm_flags; + if (is_lutperm_pip(lp)) { + if (disable_router_lutperm) + return true; + auto rule = lutperm_allowed.at(get_slice_index(pip.location.x, pip.location.y, lutperm_lut(lp) / 2)); + if (rule == LutPermRule::NONE) { + // Permutation not allowed + return true; + } else if (rule == LutPermRule::CARRY) { + // Can swap A/B and C/D only + int i = lutperm_out(lp), j = lutperm_in(lp); + if ((i / 2) != (j / 2)) + return true; + } + } + return false; + } + bool checkPipAvail(PipId pip) const override { return (getBoundPipNet(pip) == nullptr) && !is_pip_blocked(pip); } bool checkPipAvailForNet(PipId pip, NetInfo *net) const override { NetInfo *bound_net = getBoundPipNet(pip); - return bound_net == nullptr || bound_net == net; + return (bound_net == nullptr || bound_net == net) && !is_pip_blocked(pip); } NetInfo *getBoundPipNet(PipId pip) const override { return pip2net.at(get_pip_vecidx(pip)); } diff --git a/ecp5/archdefs.h b/ecp5/archdefs.h index 80e7810c..dd260a3e 100644 --- a/ecp5/archdefs.h +++ b/ecp5/archdefs.h @@ -165,6 +165,7 @@ struct ArchCellInfo : BaseClusterInfo bool using_dff; bool has_l6mux; bool is_carry; + bool is_memory; IdString clk_sig, lsr_sig, clkmux, lsrmux, srmode; int sd0, sd1; } sliceInfo; diff --git a/ecp5/bitstream.cc b/ecp5/bitstream.cc index 338c4f20..de6c711e 100644 --- a/ecp5/bitstream.cc +++ b/ecp5/bitstream.cc @@ -522,6 +522,55 @@ static void set_pip(Context *ctx, ChipConfig &cc, PipId pip) cc.tiles[tile].add_arc(sink, source); } +static unsigned permute_lut(Context *ctx, CellInfo *cell, pool &used_phys_pins, int k, unsigned orig_init) +{ + std::array, 4> phys_to_log; + const std::array ports{k ? id_A1 : id_A0, k ? id_B1 : id_B0, k ? id_C1 : id_C0, k ? id_D1 : id_D0}; + for (unsigned i = 0; i < 4; i++) { + WireId pin_wire = ctx->getBelPinWire(cell->bel, ports[i]); + for (PipId pip : ctx->getPipsUphill(pin_wire)) { + if (!ctx->getBoundPipNet(pip)) + continue; + unsigned lp = ctx->loc_info(pip)->pip_data[pip.index].lutperm_flags; + if (!is_lutperm_pip(lp)) { // non-permuting + phys_to_log[i].push_back(i); + } else { // permuting + unsigned from_pin = lutperm_in(lp); + unsigned to_pin = lutperm_out(lp); + NPNR_ASSERT(to_pin == i); + phys_to_log[from_pin].push_back(i); + } + } + } + for (unsigned i = 0; i < 4; i++) + if (!phys_to_log.at(i).empty()) + used_phys_pins.insert(ports.at(i)); + if (cell->sliceInfo.is_carry) { + // Insert dummy entries to ensure we keep the split between the two halves of a CCU2 + for (unsigned i = 0; i < 4; i++) { + if (!phys_to_log.at(i).empty()) + continue; + for (unsigned j = 2 * (i / 2); j < 2 * ((i / 2) + 1); j++) { + if (!ctx->getBoundWireNet(ctx->getBelPinWire(cell->bel, ports[j]))) + phys_to_log.at(i).push_back(j); + } + } + } + unsigned permuted_init = 0; + for (unsigned i = 0; i < 16; i++) { + unsigned log_idx = 0; + for (unsigned j = 0; j < 4; j++) { + if ((i >> j) & 0x1) { + for (auto log_pin : phys_to_log[j]) + log_idx |= (1 << log_pin); + } + } + if ((orig_init >> log_idx) & 0x1) + permuted_init |= (1 << i); + } + return permuted_init; +} + static std::vector parse_config_str(const Property &p, int length) { std::vector word; @@ -787,12 +836,15 @@ void write_bitstream(Context *ctx, std::string base_config_file, std::string tex } BelId bel = ci->bel; if (ci->type == ctx->id("TRELLIS_SLICE")) { + pool used_phys_pins; std::string tname = ctx->get_tile_by_type_loc(bel.location.y, bel.location.x, "PLC2"); std::string slice = ctx->loc_info(bel)->bel_data[bel.index].name.get(); int lut0_init = int_or_default(ci->params, ctx->id("LUT0_INITVAL")); int lut1_init = int_or_default(ci->params, ctx->id("LUT1_INITVAL")); - cc.tiles[tname].add_word(slice + ".K0.INIT", int_to_bitvector(lut0_init, 16)); - cc.tiles[tname].add_word(slice + ".K1.INIT", int_to_bitvector(lut1_init, 16)); + cc.tiles[tname].add_word(slice + ".K0.INIT", + int_to_bitvector(permute_lut(ctx, ci, used_phys_pins, 0, lut0_init), 16)); + cc.tiles[tname].add_word(slice + ".K1.INIT", + int_to_bitvector(permute_lut(ctx, ci, used_phys_pins, 1, lut1_init), 16)); cc.tiles[tname].add_enum(slice + ".MODE", str_or_default(ci->params, ctx->id("MODE"), "LOGIC")); cc.tiles[tname].add_enum(slice + ".GSR", str_or_default(ci->params, ctx->id("GSR"), "ENABLED")); cc.tiles[tname].add_enum(slice + ".REG0.SD", intstr_or_default(ci->params, ctx->id("REG0_SD"), "0")); @@ -854,7 +906,7 @@ void write_bitstream(Context *ctx, std::string base_config_file, std::string tex // Tie unused inputs high for (auto input : {id_A0, id_B0, id_C0, id_D0, id_A1, id_B1, id_C1, id_D1}) { - if (ci->ports.find(input) == ci->ports.end() || ci->ports.at(input).net == nullptr) { + if (!used_phys_pins.count(input)) { cc.tiles[tname].add_enum(slice + "." + input.str(ctx) + "MUX", "1"); } } diff --git a/ecp5/main.cc b/ecp5/main.cc index f3861149..1864548b 100644 --- a/ecp5/main.cc +++ b/ecp5/main.cc @@ -85,6 +85,7 @@ po::options_description ECP5CommandHandler::getArchOptions() specific.add_options()( "out-of-context", "disable IO buffer insertion and global promotion/routing, for building pre-routed blocks (experimental)"); + specific.add_options()("disable-router-lutperm", "don't allow the router to permute LUT inputs"); return specific; } @@ -255,6 +256,8 @@ std::unique_ptr ECP5CommandHandler::createContext(dictsettings[ctx->id("arch.speed")] = speedString(ctx->archArgs().speed); if (vm.count("out-of-context")) ctx->settings[ctx->id("arch.ooc")] = 1; + if (vm.count("disable-router-lutperm")) + ctx->settings[ctx->id("arch.disable_router_lutperm")] = 1; return ctx; } diff --git a/ecp5/pack.cc b/ecp5/pack.cc index cbf882a8..85d92336 100644 --- a/ecp5/pack.cc +++ b/ecp5/pack.cc @@ -3253,7 +3253,9 @@ void Arch::assignArchInfo() ci->sliceInfo.clkmux = id(str_or_default(ci->params, id_CLKMUX, "CLK")); ci->sliceInfo.lsrmux = id(str_or_default(ci->params, id_LSRMUX, "LSR")); ci->sliceInfo.srmode = id(str_or_default(ci->params, id_SRMODE, "LSR_OVER_CE")); - ci->sliceInfo.is_carry = str_or_default(ci->params, id("MODE"), "LOGIC") == "CCU2"; + std::string mode = str_or_default(ci->params, id("MODE"), "LOGIC"); + ci->sliceInfo.is_carry = (mode == "CCU2"); + ci->sliceInfo.is_memory = (mode == "DPRAM" || mode == "RAMW"); ci->sliceInfo.sd0 = std::stoi(str_or_default(ci->params, id("REG0_SD"), "0")); ci->sliceInfo.sd1 = std::stoi(str_or_default(ci->params, id("REG1_SD"), "0")); ci->sliceInfo.has_l6mux = false; diff --git a/ecp5/trellis_import.py b/ecp5/trellis_import.py index 2e76fb74..a586db7b 100755 --- a/ecp5/trellis_import.py +++ b/ecp5/trellis_import.py @@ -426,6 +426,8 @@ def write_database(dev_name, chip, ddrg, endianness): if cls == 1 and "PCS" in snk_name or "DCU" in snk_name or "DCU" in src_name: cls = 2 bba.u8(cls, "pip_type") + bba.u16(arc.lutperm_flags, "lutperm_flags") + bba.u16(0, "padding") if len(loctype.wires) > 0: for wire_idx in range(len(loctype.wires)): wire = loctype.wires[wire_idx] @@ -623,7 +625,7 @@ def main(): # print("Initialising chip...") chip = pytrellis.Chip(dev_names[args.device]) # print("Building routing graph...") - ddrg = pytrellis.make_dedup_chipdb(chip) + ddrg = pytrellis.make_dedup_chipdb(chip, include_lutperm_pips=True) max_row = chip.get_max_row() max_col = chip.get_max_col() process_timing_data()