from os import path import sys sys.path.append(path.join(path.dirname(__file__), "../..")) from himbaechel_dbgen.chip import * # Grid size including IOBs at edges X = 100 Y = 100 # LUT input count K = 4 # SLICEs per tile N = 8 # number of local wires Wl = N * (K + 1) + 16 # 1/Fc for bel input wire pips; local wire pips and neighbour pips Si = 6 Sq = 6 Sl = 1 dirs = [ # name, dx, dy ("N", 0, -1), ("NE", 1, -1), ("E", 1, 0), ("SE", 1, 1), ("S", 0, 1), ("SW", -1, 1), ("W", -1, 0), ("NW", -1, -1) ] def create_switch_matrix(tt: TileType, inputs: list[str], outputs: list[str]): # FIXME: terrible routing matrix, just for a toy example... # constant wires tt.create_wire("GND", "GND", const_value="GND") tt.create_wire("VCC", "VCC", const_value="VCC") # switch wires for i in range(Wl): tt.create_wire(f"SWITCH{i}", "SWITCH") # neighbor wires for i in range(Wl): for d, dx, dy in dirs: tt.create_wire(f"{d}{i}", f"NEIGH_{d}") # input pips for i, w in enumerate(inputs): for j in range((i % Si), Wl, Si): tt.create_pip(f"SWITCH{j}", w, timing_class="SWINPUT") # output pips for i, w in enumerate(outputs): for j in range((i % Sq), Wl, Sq): tt.create_pip(w, f"SWITCH{j}", timing_class="SWINPUT") # constant pips for i in range(Wl): tt.create_pip("GND", f"SWITCH{i}") tt.create_pip("VCC", f"SWITCH{i}") # neighbour local pips for i in range(Wl): for j, (d, dx, dy) in enumerate(dirs): tt.create_pip(f"{d}{(i + j) % Wl}", f"SWITCH{i}", timing_class="SWNEIGH") # clock "ladder" if not tt.has_wire("CLK"): tt.create_wire(f"CLK", "TILE_CLK") tt.create_wire(f"CLK_PREV", "CLK_ROUTE") tt.create_pip(f"CLK_PREV", f"CLK") def create_logic_tiletype(chip: Chip): tt = chip.create_tile_type("LOGIC") # setup wires inputs = [] outputs = [] for i in range(N): for j in range(K): inputs.append(f"L{i}_I{j}") tt.create_wire(f"L{i}_I{j}", "LUT_INPUT") tt.create_wire(f"L{i}_D", "FF_DATA") tt.create_wire(f"L{i}_O", "LUT_OUT") tt.create_wire(f"L{i}_Q", "FF_OUT") outputs += [f"L{i}_O", f"L{i}_Q"] tt.create_wire(f"CLK", "TILE_CLK") # create logic cells for i in range(N): # LUT lut = tt.create_bel(f"L{i}_LUT", "LUT4", z=(i*2 + 0)) for j in range(K): tt.add_bel_pin(lut, f"I[{j}]", f"L{i}_I{j}", PinType.INPUT) tt.add_bel_pin(lut, "F", f"L{i}_O", PinType.OUTPUT) # FF data can come from LUT output or LUT I3 tt.create_pip(f"L{i}_O", f"L{i}_D") tt.create_pip(f"L{i}_I{K-1}", f"L{i}_D") # FF ff = tt.create_bel(f"L{i}_FF", "DFF", z=(i*2 + 1)) tt.add_bel_pin(ff, "D", f"L{i}_D", PinType.INPUT) tt.add_bel_pin(ff, "CLK", "CLK", PinType.INPUT) tt.add_bel_pin(ff, "Q", f"L{i}_Q", PinType.OUTPUT) create_switch_matrix(tt, inputs, outputs) return tt N_io = 2 def create_io_tiletype(chip: Chip): tt = chip.create_tile_type("IO") # setup wires inputs = [] outputs = [] for i in range(N_io): tt.create_wire(f"IO{i}_T", "IO_T") tt.create_wire(f"IO{i}_I", "IO_I") tt.create_wire(f"IO{i}_O", "IO_O") tt.create_wire(f"IO{i}_PAD", "IO_PAD") inputs += [f"IO{i}_T", f"IO{i}_I"] outputs += [f"IO{i}_O", ] tt.create_wire(f"CLK", "TILE_CLK") for i in range(N_io): io = tt.create_bel(f"IO{i}", "IOB", z=i) tt.add_bel_pin(io, "I", f"IO{i}_I", PinType.INPUT) tt.add_bel_pin(io, "T", f"IO{i}_T", PinType.INPUT) tt.add_bel_pin(io, "O", f"IO{i}_O", PinType.OUTPUT) tt.add_bel_pin(io, "PAD", f"IO{i}_PAD", PinType.INOUT) # Actually used in top left IO only tt.create_wire("GCLK_OUT", "GCLK") tt.create_pip("IO0_O", "GCLK_OUT") create_switch_matrix(tt, inputs, outputs) return tt def create_bram_tiletype(chip: Chip): Aw = 9 Dw = 16 tt = chip.create_tile_type("BRAM") inputs = [f"RAM_WA{i}" for i in range(Aw)] inputs += [f"RAM_RA{i}" for i in range(Aw)] inputs += [f"RAM_WE{i}" for i in range(Dw // 8)] inputs += [f"RAM_DI{i}" for i in range(Dw)] outputs = [f"RAM_DO{i}" for i in range(Dw)] for w in inputs: tt.create_wire(w, "RAM_IN") for w in outputs: tt.create_wire(w, "RAM_OUT") tt.create_wire(f"CLK", "TILE_CLK") ram = tt.create_bel(f"RAM", F"BRAM_{2**Aw}X{Dw}", z=0) tt.add_bel_pin(ram, "CLK", f"CLK", PinType.INPUT) for i in range(Aw): tt.add_bel_pin(ram, f"WA[{i}]", f"RAM_WA{i}", PinType.INPUT) tt.add_bel_pin(ram, f"RA[{i}]", f"RAM_RA{i}", PinType.INPUT) for i in range(Dw//8): tt.add_bel_pin(ram, f"WE[{i}]", f"RAM_WE{i}", PinType.INPUT) for i in range(Dw): tt.add_bel_pin(ram, f"DI[{i}]", f"RAM_DI{i}", PinType.INPUT) tt.add_bel_pin(ram, f"DO[{i}]", f"RAM_DO{i}", PinType.OUTPUT) create_switch_matrix(tt, inputs, outputs) return tt def create_corner_tiletype(ch): tt = ch.create_tile_type("NULL") tt.create_wire(f"CLK", "TILE_CLK") tt.create_wire(f"CLK_PREV", "CLK_ROUTE") tt.create_pip(f"CLK_PREV", f"CLK") tt.create_wire(f"GND", "GND", const_value="GND") tt.create_wire(f"VCC", "VCC", const_value="VCC") gnd = tt.create_bel(f"GND_DRV", f"GND_DRV", z=0) tt.add_bel_pin(gnd, "GND", "GND", PinType.OUTPUT) vcc = tt.create_bel(f"VCC_DRV", f"VCC_DRV", z=1) tt.add_bel_pin(vcc, "VCC", "VCC", PinType.OUTPUT) return tt def is_corner(x, y): return ((x == 0) or (x == (X-1))) and ((y == 0) or (y == (Y-1))) def create_nodes(ch): for y in range(Y): #print(f"generating nodes for row {y}") for x in range(X): if not is_corner(x, y): # connect up actual neighbours local_nodes = [[NodeWire(x, y, f"SWITCH{i}")] for i in range(Wl)] for d, dx, dy in dirs: x1 = x - dx y1 = y - dy if x1 < 0 or x1 >= X or y1 < 0 or y1 >= Y or is_corner(x1, y1): continue for i in range(Wl): local_nodes[i].append(NodeWire(x1, y1, f"{d}{i}")) for n in local_nodes: ch.add_node(n) # connect up clock ladder (not intended to be a sensible clock structure) if y != 1: # special case where the node has 3 wires if y == 0: if x == 0: # clock source: IO clk_node = [NodeWire(1, 0, "GCLK_OUT")] else: # clock source: left clk_node = [NodeWire(x-1, y, "CLK")] else: # clock source: above clk_node = [NodeWire(x, y-1, "CLK")] clk_node.append(NodeWire(x, y, "CLK_PREV")) if y == 0: clk_node.append(NodeWire(x, y+1, "CLK_PREV")) ch.add_node(clk_node) def set_timings(ch): speed = "DEFAULT" tmg = ch.set_speed_grades([speed]) # --- Routing Delays --- # Notes: A simpler timing model could just use intrinsic delay and ignore R and Cs. # R and C values don't have to be physically realistic, just in agreement with themselves to provide # a meaningful scaling of delay with fanout. Units are subject to change. tmg.set_pip_class(grade=speed, name="SWINPUT", delay=TimingValue(80), # 80ps intrinstic delay in_cap=TimingValue(5000), # 5pF out_res=TimingValue(1000), # 1ohm ) tmg.set_pip_class(grade=speed, name="SWOUTPUT", delay=TimingValue(100), # 100ps intrinstic delay in_cap=TimingValue(5000), # 5pF out_res=TimingValue(800), # 0.8ohm ) tmg.set_pip_class(grade=speed, name="SWNEIGH", delay=TimingValue(120), # 120ps intrinstic delay in_cap=TimingValue(7000), # 7pF out_res=TimingValue(1200), # 1.2ohm ) # TODO: also support node/wire delays and add an example of them # --- Cell delays --- lut = ch.timing.add_cell_variant(speed, "LUT4") for j in range(K): lut.add_comb_arc(f"I[{j}]", "F", TimingValue(150 + j * 15)) dff = ch.timing.add_cell_variant(speed, "DFF") dff.add_setup_hold("CLK", "D", ClockEdge.RISING, TimingValue(150), TimingValue(25)) dff.add_clock_out("CLK", "Q", ClockEdge.RISING, TimingValue(200)) def main(): ch = Chip("example", "EX1", X, Y) # Init constant ids ch.strs.read_constids(path.join(path.dirname(__file__), "constids.inc")) logic = create_logic_tiletype(ch) io = create_io_tiletype(ch) bram = create_bram_tiletype(ch) null = create_corner_tiletype(ch) # Setup tile grid for x in range(X): for y in range(Y): if x == 0 or x == X-1: # left/right side IO if y == 0 or y == Y-1: # corner ch.set_tile_type(x, y, "NULL") else: ch.set_tile_type(x, y, "IO") elif y == 0 or y == Y-1: # top/bottom side IO ch.set_tile_type(x, y, "IO") elif (y % 15) == 7: # BRAM ch.set_tile_type(x, y, "BRAM") else: ch.set_tile_type(x, y, "LOGIC") # Create nodes between tiles create_nodes(ch) set_timings(ch) ch.write_bba(sys.argv[1]) if __name__ == '__main__': main()