diff --git a/himbaechel/uarch/gowin/constids.inc b/himbaechel/uarch/gowin/constids.inc index 1b47db91..1dd3052c 100644 --- a/himbaechel/uarch/gowin/constids.inc +++ b/himbaechel/uarch/gowin/constids.inc @@ -873,6 +873,8 @@ X(DFFNCE) X(RAM16) X(RAMW) X(RAM16SDP4) +X(RAM16SDP2) +X(RAM16SDP1) X(WADA) X(WADB) X(WADC) diff --git a/himbaechel/uarch/gowin/gowin.cc b/himbaechel/uarch/gowin/gowin.cc index 9c691a8e..14a5cc0d 100644 --- a/himbaechel/uarch/gowin/gowin.cc +++ b/himbaechel/uarch/gowin/gowin.cc @@ -80,12 +80,15 @@ bool GowinImpl::isBelLocationValid(BelId bel, bool explain_invalid) const return true; } IdString bel_type = ctx->getBelType(bel); - if (bel_type.in(id_LUT4, id_DFF)) { + switch (bel_type.hash()) { + case ID_LUT4: /* fall-through */ + case ID_DFF: return slice_valid(l.x, l.y, l.z / 2); - } else { - if (bel_type == id_ALU) { - return slice_valid(l.x, l.y, l.z - BelZ::ALU0_Z); - } + case ID_ALU: + return slice_valid(l.x, l.y, l.z - BelZ::ALU0_Z); + case ID_RAM16SDP4: + // only slices 4 and 5 are critical for RAM + return slice_valid(l.x, l.y, l.z - BelZ::RAMW_Z + 5) && slice_valid(l.x, l.y, l.z - BelZ::RAMW_Z + 4); } return true; } @@ -102,6 +105,9 @@ IdString GowinImpl::getBelBucketForCellType(IdString cell_type) const if (type_is_dff(cell_type)) { return id_DFF; } + if (type_is_ssram(cell_type)) { + return id_RAM16SDP4; + } if (cell_type == id_GOWIN_GND) { return id_GND; } @@ -123,6 +129,9 @@ bool GowinImpl::isValidBelForCellType(IdString cell_type, BelId bel) const if (bel_type == id_DFF) { return type_is_dff(cell_type); } + if (bel_type == id_RAM16SDP4) { + return type_is_ssram(cell_type); + } if (bel_type == id_GND) { return cell_type == id_GOWIN_GND; } @@ -167,11 +176,20 @@ bool GowinImpl::slice_valid(int x, int y, int z) const const CellInfo *lut = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, z * 2))); const CellInfo *ff = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, z * 2 + 1))); const CellInfo *alu = ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, z + BelZ::ALU0_Z))); + const CellInfo *ramw = + (z == 4 || z == 5) ? ctx->getBoundBelCell(ctx->getBelByLocation(Loc(x, y, BelZ::RAMW_Z))) : nullptr; if (alu && lut) { return false; } + if (ramw) { + if (alu || ff || lut) { + return false; + } + return true; + } + // check for ALU/LUT in the adjacent cell int adj_lut_z = (1 - (z & 1) * 2 + z) * 2; int adj_alu_z = adj_lut_z / 2 + BelZ::ALU0_Z; diff --git a/himbaechel/uarch/gowin/gowin.h b/himbaechel/uarch/gowin/gowin.h index 9d2f736d..e8313038 100644 --- a/himbaechel/uarch/gowin/gowin.h +++ b/himbaechel/uarch/gowin/gowin.h @@ -20,6 +20,10 @@ inline bool is_dff(const CellInfo *cell) { return type_is_dff(cell->type); } // Return true if a cell is a ALU inline bool type_is_alu(IdString cell_type) { return cell_type == id_ALU; } inline bool is_alu(const CellInfo *cell) { return type_is_alu(cell->type); } + +// Return true if a cell is a SSRAM +inline bool type_is_ssram(IdString cell_type) { return cell_type.in(id_RAM16SDP1, id_RAM16SDP2, id_RAM16SDP4); } +inline bool is_ssram(const CellInfo *cell) { return type_is_ssram(cell->type); } } // namespace // Bels Z ranges. It is desirable that these numbers be synchronized with the chipdb generator @@ -33,6 +37,7 @@ enum MUX23_Z = 22, MUX27_Z = 29, ALU0_Z = 30, // :35, 6 ALU + RAMW_Z = 36, // RAM16SDP4 VCC_Z = 277, VSS_Z = 278 diff --git a/himbaechel/uarch/gowin/gowin_arch_gen.py b/himbaechel/uarch/gowin/gowin_arch_gen.py index c3ec84aa..86e6f216 100644 --- a/himbaechel/uarch/gowin/gowin_arch_gen.py +++ b/himbaechel/uarch/gowin/gowin_arch_gen.py @@ -20,6 +20,7 @@ MUX21_Z = 18 MUX23_Z = 22 MUX27_Z = 29 ALU0_Z = 30 # : 35, 6 ALUs +RAMW_Z = 36 # RAM16SDP4 VCC_Z = 277 GND_Z = 278 @@ -137,17 +138,17 @@ def create_switch_matrix(tt: TileType, db: chipdb, x: int, y: int): def create_null_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): if ttyp in created_tiletypes: - return ttyp + return ttyp, None typename = "NULL" tt = chip.create_tile_type(f"{typename}_{ttyp}") tt.extra_data = TileExtraData(chip.strs.id(typename)) create_switch_matrix(tt, db, x, y) - return ttyp + return (ttyp, tt) # responsible nodes, there will be IO banks, configuration, etc. def create_corner_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): if ttyp in created_tiletypes: - return ttyp + return ttyp, None typename = "CORNER" tt = chip.create_tile_type(f"{typename}_{ttyp}") tt.extra_data = TileExtraData(chip.strs.id(typename)) @@ -163,12 +164,12 @@ def create_corner_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): tt.add_bel_pin(gnd, "V", "VCC", PinType.OUTPUT) create_switch_matrix(tt, db, x, y) - return ttyp + return (ttyp, tt) # simple IO - only A and B def create_io_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): if ttyp in created_tiletypes: - return ttyp + return ttyp, None typename = "IO" tt = chip.create_tile_type(f"{typename}_{ttyp}") tt.extra_data = TileExtraData(chip.strs.id(typename)) @@ -184,12 +185,12 @@ def create_io_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): tt.add_bel_pin(io, "I", portmap['I'], PinType.INPUT) tt.add_bel_pin(io, "O", portmap['O'], PinType.OUTPUT) create_switch_matrix(tt, db, x, y) - return ttyp + return (ttyp, tt) # logic: luts, dffs, alu etc def create_logic_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): if ttyp in created_tiletypes: - return ttyp + return ttyp, None typename = "LOGIC" tt = chip.create_tile_type(f"{typename}_{ttyp}") tt.extra_data = TileExtraData(chip.strs.id(typename)) @@ -281,7 +282,30 @@ def create_logic_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): tt.add_bel_pin(ff, "S0", f"SEL7", PinType.INPUT) create_switch_matrix(tt, db, x, y) - return ttyp + return (ttyp, tt) + +def create_ssram_tiletype(chip: Chip, db: chipdb, x: int, y: int, ttyp: int): + if ttyp in created_tiletypes: + return ttyp, None + # SSRAM is LUT based, so it's logic-like + ttyp, tt = create_logic_tiletype(chip, db, x, y, ttyp) + + lut_inputs = ['A', 'B', 'C', 'D'] + ff = tt.create_bel(f"RAM16SDP4", "RAM16SDP4", z = RAMW_Z) + for i in range(4): + tt.add_bel_pin(ff, f"DI[{i}]", f"{lut_inputs[i]}5", PinType.INPUT) + tt.add_bel_pin(ff, f"WAD[{i}]", f"{lut_inputs[i]}4", PinType.INPUT) + # RAD[0] is RAD[0] is assumed to be connected to A3, A2, A1 and A0. But + # for now we connect it only to A0, the others will be connected + # directly during packing. RAD[1...3] - similarly. + tt.add_bel_pin(ff, f"RAD[{i}]", f"{lut_inputs[i]}0", PinType.INPUT) + tt.add_bel_pin(ff, f"DO[{i}]", f"F{i}", PinType.OUTPUT) + + + tt.add_bel_pin(ff, f"CLK", "CLK2", PinType.INPUT) + tt.add_bel_pin(ff, f"CE", "CE2", PinType.INPUT) + tt.add_bel_pin(ff, f"WRE", "LSR2", PinType.INPUT) + return (ttyp, tt) def main(): parser = argparse.ArgumentParser(description='Make Gowin BBA') @@ -305,28 +329,33 @@ def main(): # The manufacturer distinguishes by externally identical tiles, so keep # these differences (in case it turns out later that there is a slightly # different routing or something like that). - logic_tiletypes = {12, 13, 14, 15, 16, 17} + logic_tiletypes = {12, 13, 14, 15, 16} io_tiletypes = {53, 55, 58, 59, 64, 65} + ssram_tiletypes = {17, 18, 19} # Setup tile grid for x in range(X): for y in range(Y): ttyp = db.grid[y][x].ttyp if (x == 0 or x == X - 1) and (y == 0 or y == Y - 1): assert ttyp not in created_tiletypes, "Duplication of corner types" - ttyp = create_corner_tiletype(ch, db, x, y, ttyp) + ttyp, _ = create_corner_tiletype(ch, db, x, y, ttyp) created_tiletypes.add(ttyp) ch.set_tile_type(x, y, f"CORNER_{ttyp}") continue if ttyp in logic_tiletypes: - ttyp = create_logic_tiletype(ch, db, x, y, ttyp) + ttyp, _ = create_logic_tiletype(ch, db, x, y, ttyp) + created_tiletypes.add(ttyp) + ch.set_tile_type(x, y, f"LOGIC_{ttyp}") + elif ttyp in ssram_tiletypes: + ttyp, _ = create_ssram_tiletype(ch, db, x, y, ttyp) created_tiletypes.add(ttyp) ch.set_tile_type(x, y, f"LOGIC_{ttyp}") elif ttyp in io_tiletypes: - ttyp = create_io_tiletype(ch, db, x, y, ttyp) + ttyp, _ = create_io_tiletype(ch, db, x, y, ttyp) created_tiletypes.add(ttyp) ch.set_tile_type(x, y, f"IO_{ttyp}") else: - ttyp = create_null_tiletype(ch, db, x, y, ttyp) + ttyp, _ = create_null_tiletype(ch, db, x, y, ttyp) created_tiletypes.add(ttyp) ch.set_tile_type(x, y, f"NULL_{ttyp}") diff --git a/himbaechel/uarch/gowin/pack.cc b/himbaechel/uarch/gowin/pack.cc index 9989a430..7be4966d 100644 --- a/himbaechel/uarch/gowin/pack.cc +++ b/himbaechel/uarch/gowin/pack.cc @@ -384,9 +384,91 @@ struct GowinPacker int lutffs = h.constrain_cell_pairs(lut_outs, dff_ins, 1); log_info("Constrained %d LUTFF pairs.\n", lutffs); + } - log_info("Pack ALUs...\n"); - pack_alus(); + // =================================== + // SSRAM cluster + // =================================== + // create ALU filler block + std::unique_ptr ssram_make_lut(Context *ctx, CellInfo *ci, int index) + { + IdString name_id = ctx->idf("%s_LUT%d", ci->name.c_str(ctx), index); + auto lut_ci = std::make_unique(ctx, name_id, id_LUT4); + if (index) { + for (IdString port : {id_I0, id_I1, id_I2, id_I3}) { + lut_ci->addInput(port); + } + } + IdString init_name = ctx->idf("INIT_%d", index); + if (ci->params.count(init_name)) { + lut_ci->params[id_INIT] = ci->params.at(init_name); + } else { + lut_ci->params[id_INIT] = std::string("1111111111111111"); + } + return lut_ci; + } + + void pack_ram16sdp4(void) + { + std::vector> new_cells; + + log_info("Pack RAMs...\n"); + for (auto &cell : ctx->cells) { + auto ci = cell.second.get(); + if (ci->cluster != ClusterId()) { + continue; + } + + if (is_ssram(ci)) { + // make cluster root + ci->cluster = ci->name; + ci->constr_abs_z = true; + ci->constr_x = 0; + ci->constr_y = 0; + ci->constr_z = BelZ::RAMW_Z; + + ci->addInput(id_CE); + ci->connectPort(id_CE, ctx->nets[ctx->id("$PACKER_VCC")].get()); + + // RAD networks + NetInfo *rad[4]; + for (int i = 0; i < 4; ++i) { + rad[i] = ci->getPort(ctx->idf("RAD[%d]", i)); + } + + // active LUTs + int luts_num = 4; + if (ci->type == id_RAM16SDP1) { + luts_num = 1; + } else { + if (ci->type == id_RAM16SDP2) { + luts_num = 2; + } + } + + // make actual storage cells + for (int i = 0; i < 4; ++i) { + new_cells.push_back(std::move(ssram_make_lut(ctx, ci, i))); + CellInfo *lut_ci = new_cells.back().get(); + ci->constr_children.push_back(lut_ci); + lut_ci->cluster = ci->name; + lut_ci->constr_abs_z = true; + lut_ci->constr_x = 0; + lut_ci->constr_y = 0; + lut_ci->constr_z = i * 2; + // inputs + // LUT0 is already connected when generating the base + if (i && i < luts_num) { + for (int j = 0; j < 4; ++j) { + lut_ci->connectPort(ctx->idf("I%d", j), rad[j]); + } + } + } + } + } + for (auto &ncell : new_cells) { + ctx->cells[ncell->name] = std::move(ncell); + } } void run(void) @@ -396,6 +478,7 @@ struct GowinPacker pack_wideluts(); pack_alus(); constrain_lutffs(); + pack_ram16sdp4(); } }; } // namespace