From c82654d003a81a778f88e67e303dd47185dd44cb Mon Sep 17 00:00:00 2001 From: YRabbit Date: Sun, 2 Jul 2023 16:09:39 +1000 Subject: [PATCH] gowin: Himbaechel. Add a wideluts - MUX2_LUT5, MUX2_LUT6, MUX2_LUT7 and MUX2_LUT8 support; - storing a common class of files in extra_data; - misc fixes. Signed-off-by: YRabbit --- himbaechel/uarch/gowin/gowin.cc | 127 +++++++++++++++++++---- himbaechel/uarch/gowin/gowin.h | 18 ++++ himbaechel/uarch/gowin/gowin_arch_gen.py | 80 ++++++++++++-- 3 files changed, 196 insertions(+), 29 deletions(-) diff --git a/himbaechel/uarch/gowin/gowin.cc b/himbaechel/uarch/gowin/gowin.cc index 3c7ec55c..0489e0c4 100644 --- a/himbaechel/uarch/gowin/gowin.cc +++ b/himbaechel/uarch/gowin/gowin.cc @@ -19,17 +19,7 @@ void GowinImpl::init(Context *ctx) // Arch::archArgsToId } -void GowinImpl::prePlace() -{ - ctx->cells.at(ctx->id("leds_OBUF_O"))->setAttr(ctx->id("BEL"), std::string("X0Y14/IOBA")); - ctx->cells.at(ctx->id("leds_OBUF_O_1"))->setAttr(ctx->id("BEL"), std::string("X0Y15/IOBB")); - ctx->cells.at(ctx->id("leds_OBUF_O_2"))->setAttr(ctx->id("BEL"), std::string("X0Y20/IOBB")); - ctx->cells.at(ctx->id("leds_OBUF_O_3"))->setAttr(ctx->id("BEL"), std::string("X0Y21/IOBB")); - ctx->cells.at(ctx->id("leds_OBUF_O_4"))->setAttr(ctx->id("BEL"), std::string("X0Y24/IOBB")); - ctx->cells.at(ctx->id("leds_OBUF_O_5"))->setAttr(ctx->id("BEL"), std::string("X0Y25/IOBB")); - ctx->cells.at(ctx->id("rst_IBUF_I"))->setAttr(ctx->id("BEL"), std::string("X0Y4/IOBA")); - assign_cell_info(); -} +void GowinImpl::prePlace() { assign_cell_info(); } void GowinImpl::pack() { @@ -47,15 +37,17 @@ void GowinImpl::pack() // disconnect the constant LUT inputs mod_lut_inputs(); + pack_wideluts(); + // Constrain directly connected LUTs and FFs together to use dedicated resources - int lutffs = h.constrain_cell_pairs( - pool{{id_LUT1, id_F}, {id_LUT2, id_F}, {id_LUT3, id_F}, {id_LUT4, id_F}}, - pool{{id_DFF, id_D}, {id_DFFE, id_D}, {id_DFFN, id_D}, {id_DFFNE, id_D}, - {id_DFFS, id_D}, {id_DFFSE, id_D}, {id_DFFNS, id_D}, {id_DFFNSE, id_D}, - {id_DFFR, id_D}, {id_DFFRE, id_D}, {id_DFFNR, id_D}, {id_DFFNRE, id_D}, - {id_DFFP, id_D}, {id_DFFPE, id_D}, {id_DFFNP, id_D}, {id_DFFNPE, id_D}, - {id_DFFC, id_D}, {id_DFFCE, id_D}, {id_DFFNC, id_D}, {id_DFFNCE, id_D}}, - 1); + auto lut_outs = pool{{id_LUT1, id_F}, {id_LUT2, id_F}, {id_LUT3, id_F}, {id_LUT4, id_F}}; + auto dff_ins = pool{{id_DFF, id_D}, {id_DFFE, id_D}, {id_DFFN, id_D}, {id_DFFNE, id_D}, + {id_DFFS, id_D}, {id_DFFSE, id_D}, {id_DFFNS, id_D}, {id_DFFNSE, id_D}, + {id_DFFR, id_D}, {id_DFFRE, id_D}, {id_DFFNR, id_D}, {id_DFFNRE, id_D}, + {id_DFFP, id_D}, {id_DFFPE, id_D}, {id_DFFNP, id_D}, {id_DFFNPE, id_D}, + {id_DFFC, id_D}, {id_DFFCE, id_D}, {id_DFFNC, id_D}, {id_DFFNCE, id_D}}; + + int lutffs = h.constrain_cell_pairs(lut_outs, dff_ins, 1); log_info("Constrained %d LUTFF pairs.\n", lutffs); } @@ -237,4 +229,101 @@ void GowinImpl::mod_lut_inputs(void) } } +// make cluster from LUTs and MUXes +void GowinImpl::pack_wideluts(void) +{ + // children's offsets + struct _children + { + IdString port; + int dx, dz; + } mux_inputs[4][2] = {{{id_I0, 1, -7}, {id_I1, 0, -7}}, + {{id_I0, 0, 4}, {id_I1, 0, -4}}, + {{id_I0, 0, 2}, {id_I1, 0, -2}}, + {{id_I0, 0, -BelZ::MUX20_Z}, {id_I1, 0, 2 - BelZ::MUX20_Z}}}; + typedef std::function recurse_func_t; + recurse_func_t make_cluster = [&, this](CellInfo &ci_root, CellInfo *ci_cursor, int dx, int dz) { + _children *inputs; + if (is_lut(ci_cursor)) { + return; + } + switch (ci_cursor->type.hash()) { + case ID_MUX2_LUT8: + inputs = mux_inputs[0]; + break; + case ID_MUX2_LUT7: + inputs = mux_inputs[1]; + break; + case ID_MUX2_LUT6: + inputs = mux_inputs[2]; + break; + case ID_MUX2_LUT5: + inputs = mux_inputs[3]; + break; + default: + log_error("Bad MUX2 node:%s\n", ctx->nameOf(ci_cursor)); + } + for (int i = 0; i < 2; ++i) { + // input src + NetInfo *in = ci_cursor->getPort(inputs[i].port); + NPNR_ASSERT(in && in->driver.cell); + int child_dx = dx + inputs[i].dx; + int child_dz = dz + inputs[i].dz; + ci_root.constr_children.push_back(in->driver.cell); + in->driver.cell->cluster = ci_root.name; + in->driver.cell->constr_abs_z = false; + in->driver.cell->constr_x = child_dx; + in->driver.cell->constr_y = 0; + in->driver.cell->constr_z = child_dz; + make_cluster(ci_root, in->driver.cell, child_dx, child_dz); + } + }; + + // look for MUX2 + // MUX2_LUT8 create right away, collect others + std::vector muxes[3]; + int packed[4] = {0, 0, 0, 0}; + for (auto &cell : ctx->cells) { + auto &ci = *cell.second; + if (ci.cluster != ClusterId()) { + continue; + } + if (ci.type == id_MUX2_LUT8) { + ci.cluster = ci.name; + ci.constr_abs_z = 0; + make_cluster(ci, &ci, 0, 0); + ++packed[0]; + continue; + } + if (ci.type.in(id_MUX2_LUT7, id_MUX2_LUT6, id_MUX2_LUT5)) { + switch (ci.type.hash()) { + case ID_MUX2_LUT7: + muxes[0].push_back(cell.first); + break; + case ID_MUX2_LUT6: + muxes[1].push_back(cell.first); + break; + default: // ID_MUX2_LUT5 + muxes[2].push_back(cell.first); + break; + } + } + } + // create others + for (int i = 0; i < 3; ++i) { + for (IdString cell_name : muxes[i]) { + auto &ci = *ctx->cells.at(cell_name); + if (ci.cluster != ClusterId()) { + continue; + } + ci.cluster = ci.name; + ci.constr_abs_z = 0; + make_cluster(ci, &ci, 0, 0); + ++packed[i + 1]; + } + } + log_info("Packed MUX2_LUT8:%d, MUX2_LU7:%d, MUX2_LUT6:%d, MUX2_LUT5:%d\n", packed[0], packed[1], packed[2], + packed[3]); +} + NEXTPNR_NAMESPACE_END diff --git a/himbaechel/uarch/gowin/gowin.h b/himbaechel/uarch/gowin/gowin.h index 1fa7d9de..ed18cdab 100644 --- a/himbaechel/uarch/gowin/gowin.h +++ b/himbaechel/uarch/gowin/gowin.h @@ -45,6 +45,8 @@ struct GowinImpl : HimbaechelAPI // modify LUTs with constant inputs void mod_lut_inputs(void); + void pack_wideluts(void); + // Return true if a cell is a LUT inline bool type_is_lut(IdString cell_type) const { return cell_type.in(id_LUT1, id_LUT2, id_LUT3, id_LUT4); } inline bool is_lut(const CellInfo *cell) const { return type_is_lut(cell->type); } @@ -68,5 +70,21 @@ struct GowinArch : HimbaechelArch } exampleArch; } // namespace +// Bels Z ranges. It is desirable that these numbers be synchronized with the chipdb generator +namespace BelZ { +enum +{ + LUT0_Z = 0, + LUT7_Z = 14, + MUX20_Z = 16, + MUX21_Z = 18, + MUX23_Z = 22, + MUX27_Z = 29, + + VCC_Z = 277, + VSS_Z = 278 +}; +} + NEXTPNR_NAMESPACE_END #endif diff --git a/himbaechel/uarch/gowin/gowin_arch_gen.py b/himbaechel/uarch/gowin/gowin_arch_gen.py index a40c76ae..08a1e5d8 100644 --- a/himbaechel/uarch/gowin/gowin_arch_gen.py +++ b/himbaechel/uarch/gowin/gowin_arch_gen.py @@ -12,8 +12,27 @@ from himbaechel_dbgen.chip import * from apycula import chipdb # Z of the bels -VCC_Z = 277 -GND_Z = 288 +# sync with C++ part! +LUT0_Z = 0 # z(DFFx) = z(LUTx) + 1 +LUT7_Z = 14 +MUX20_Z = 16 +MUX21_Z = 18 +MUX23_Z = 22 +MUX27_Z = 29 + +VCC_Z = 277 +GND_Z = 278 + +@dataclass +class TileExtraData(BBAStruct): + tile_class: IdString # The general functionality of the slightly different tiles, + # let's say the behavior of LUT+DFF in the tiles are completely identical, + # but one of them also contains clock-wire switches, + # then we assign them to the same LOGIC class. + def serialise_lists(self, context: str, bba: BBAWriter): + pass + def serialise(self, context: str, bba: BBAWriter): + bba.u32(self.tile_class.index) created_tiletypes = set() @@ -72,6 +91,10 @@ def create_nodes(chip: Chip, db: chipdb): nodes.append([NodeWire(x, y, f'{d}8{i}0'), NodeWire(*uturn(db, x + offs[0] * 4, y + offs[1] * 4, f'{d}8{i}4')), NodeWire(*uturn(db, x + offs[0] * 8, y + offs[1] * 8, f'{d}8{i}8'))]) + # I0 for MUX2_LUT8 + if x < X - 1 and chip.tile_type_at(x, y).extra_data.tile_class == chip.strs.id('LOGIC') and chip.tile_type_at(x, y).extra_data.tile_class == chip.strs.id('LOGIC'): + nodes.append([NodeWire(x, y, 'OF30'), + NodeWire(x + 1, y, 'OF3')]) for node in nodes: chip.add_node(node) # VCC and VSS sources in the all tiles @@ -99,7 +122,9 @@ def create_switch_matrix(tt: TileType, db: chipdb, x: int, y: int): def create_null_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): if ttyp in created_tiletypes: return ttyp - tt = chip.create_tile_type(f"NULL_{ttyp}") + typename = "NULL" + tt = chip.create_tile_type(f"{typename}_{ttyp}") + tt.extra_data = TileExtraData(chip.strs.id(typename)) create_switch_matrix(tt, db, x, y) return ttyp @@ -107,7 +132,9 @@ def create_null_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): def create_corner_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): if ttyp in created_tiletypes: return ttyp - tt = chip.create_tile_type(f"CORNER_{ttyp}") + typename = "CORNER" + tt = chip.create_tile_type(f"{typename}_{ttyp}") + tt.extra_data = TileExtraData(chip.strs.id(typename)) if x == 0 and y == 0: # GND is the logic low level generator @@ -126,7 +153,10 @@ def create_corner_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): def create_io_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): if ttyp in created_tiletypes: return ttyp - tt = chip.create_tile_type(f"IO_{ttyp}") + typename = "IO" + tt = chip.create_tile_type(f"{typename}_{ttyp}") + tt.extra_data = TileExtraData(chip.strs.id(typename)) + for i in range(2): name = ['IOBA', 'IOBB'][i] # wires @@ -142,13 +172,15 @@ def create_io_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): # XXX lut+dff only for now def create_logic_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): - N = 8 lut_inputs = ['A', 'B', 'C', 'D'] if ttyp in created_tiletypes: return ttyp - tt = chip.create_tile_type(f"LOGIC_{ttyp}") + typename = "LOGIC" + tt = chip.create_tile_type(f"{typename}_{ttyp}") + tt.extra_data = TileExtraData(chip.strs.id(typename)) + # setup wires - for i in range(N): + for i in range(8): for inp_name in lut_inputs: tt.create_wire(f"{inp_name}{i}", "LUT_INPUT") tt.create_wire(f"F{i}", "LUT_OUT") @@ -162,9 +194,13 @@ def create_logic_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): tt.create_wire(f"CLK{j}", "TILE_CLK") tt.create_wire(f"LSR{j}", "TILE_LSR") tt.create_wire(f"CE{j}", "TILE_CE") + for j in range(8): + tt.create_wire(f"OF{j}", "MUX_OUT") + tt.create_wire(f"SEL{j}", "MUX_SEL") + tt.create_wire("OF30", "MUX_OUT") # create logic cells - for i in range(N): + for i in range(8): # LUT lut = tt.create_bel(f"LUT{i}", "LUT4", z = (i * 2 + 0)) for j, inp_name in enumerate(lut_inputs): @@ -186,6 +222,30 @@ def create_logic_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): tt.add_bel_pin(ff, "PRESET", f"LSR{i // 2}", PinType.INPUT) tt.add_bel_pin(ff, "CLEAR", f"LSR{i // 2}", PinType.INPUT) tt.add_bel_pin(ff, "CE", f"CE{i // 2}", PinType.INPUT) + # wide luts + for i in range(4): + ff = tt.create_bel(f"MUX{i * 2}", "MUX2_LUT5", z = MUX20_Z + i * 4) + tt.add_bel_pin(ff, "I0", f"F{i * 2}", PinType.INPUT) + tt.add_bel_pin(ff, "I1", f"F{i * 2 + 1}", PinType.INPUT) + tt.add_bel_pin(ff, "O", f"OF{i * 2}", PinType.OUTPUT) + tt.add_bel_pin(ff, "S0", f"SEL{i * 2}", PinType.INPUT) + for i in range(2): + ff = tt.create_bel(f"MUX{i * 4 + 1}", "MUX2_LUT6", z = MUX21_Z + i * 8) + tt.add_bel_pin(ff, "I0", f"OF{i * 4 + 2}", PinType.INPUT) + tt.add_bel_pin(ff, "I1", f"OF{i * 4}", PinType.INPUT) + tt.add_bel_pin(ff, "O", f"OF{i * 4 + 1}", PinType.OUTPUT) + tt.add_bel_pin(ff, "S0", f"SEL{i * 4 + 1}", PinType.INPUT) + ff = tt.create_bel(f"MUX3", "MUX2_LUT7", z = MUX23_Z) + tt.add_bel_pin(ff, "I0", f"OF5", PinType.INPUT) + tt.add_bel_pin(ff, "I1", f"OF1", PinType.INPUT) + tt.add_bel_pin(ff, "O", f"OF3", PinType.OUTPUT) + tt.add_bel_pin(ff, "S0", f"SEL3", PinType.INPUT) + ff = tt.create_bel(f"MUX7", "MUX2_LUT8", z = MUX27_Z) + tt.add_bel_pin(ff, "I0", f"OF30", PinType.INPUT) + tt.add_bel_pin(ff, "I1", f"OF3", PinType.INPUT) + tt.add_bel_pin(ff, "O", f"OF7", PinType.OUTPUT) + tt.add_bel_pin(ff, "S0", f"SEL7", PinType.INPUT) + create_switch_matrix(tt, db, x, y) return ttyp @@ -212,7 +272,7 @@ def main(): # these differences (in case it turns out later that there is a slightly # different routing or something like that). logic_tiletypes = {12, 13, 14, 15, 16, 17} - io_tiletypes = {53, 58, 64} # Tangnano9k leds tiles and clock ;) + io_tiletypes = {53, 55, 58, 59, 64, 65} # Setup tile grid for x in range(X): for y in range(Y):