himbaechel: Initial timing support

Signed-off-by: gatecat <gatecat@ds0.me>
This commit is contained in:
gatecat 2023-08-25 12:04:39 +02:00 committed by myrtle
parent 890d7f7617
commit 3e1e783873
9 changed files with 586 additions and 25 deletions

View File

@ -2,7 +2,7 @@
# tcl synth_viaduct_example.tcl {out.json}
yosys read_verilog -lib [file dirname [file normalize $argv0]]/example_prims.v
yosys hierarchy -check
yosys hierarchy -check -top top
yosys proc
yosys flatten
yosys tribuf -logic

View File

@ -63,6 +63,35 @@ Arch::Arch(ArchArgs args)
IdString::initialize_add(this, chip_info->extra_constids->bba_ids[i].get(),
i + chip_info->extra_constids->known_id_count);
}
// Select speed grade
if (args.speed.empty()) {
if (chip_info->speed_grades.ssize() == 0) {
// no timing information and no speed grade specified
speed_grade = nullptr;
} else if (chip_info->speed_grades.ssize() == 1) {
// speed grade not specified but only one available; use it
speed_grade = &(chip_info->speed_grades[0]);
} else {
std::string available_speeds = "";
for (const auto &speed_data : chip_info->speed_grades) {
if (!available_speeds.empty())
available_speeds += ", ";
available_speeds += IdString(speed_data.name).c_str(this);
}
log_error("Speed grade must be specified using --speed (available options: %s).\n",
available_speeds.c_str());
}
} else {
for (const auto &speed_data : chip_info->speed_grades) {
if (IdString(speed_data.name) == id(args.speed)) {
speed_grade = &speed_data;
break;
}
}
if (!speed_grade) {
log_error("Speed grade '%s' not found in database.\n", args.speed.c_str());
}
}
init_tiles();
}
@ -167,6 +196,7 @@ bool Arch::place()
bool Arch::route()
{
set_fast_pip_delays(true);
uarch->preRoute();
std::string router = str_or_default(settings, id("router"), defaultRouter);
bool result;
@ -181,6 +211,7 @@ bool Arch::route()
uarch->postRoute();
getCtx()->settings[getCtx()->id("route")] = 1;
archInfoToAttributes();
set_fast_pip_delays(false);
return result;
}
@ -190,6 +221,8 @@ void Arch::assignArchInfo()
for (auto &cell : cells) {
CellInfo *ci = cell.second.get();
ci->flat_index = cell_idx++;
if (speed_grade && ci->timing_index == -1)
ci->timing_index = get_cell_timing_idx(ci->type);
for (auto &port : ci->ports) {
// Default 1:1 cell:bel mapping
if (!ci->cell_bel_pins.count(port.first))
@ -259,4 +292,149 @@ const std::vector<std::string> Arch::availablePlacers = {"sa", "heap"};
const std::string Arch::defaultRouter = "router1";
const std::vector<std::string> Arch::availableRouters = {"router1", "router2"};
void Arch::set_fast_pip_delays(bool fast_mode)
{
if (fast_mode && !fast_pip_delays) {
// Have to rebuild these structures
drive_res.clear();
load_cap.clear();
for (auto &net : nets) {
for (auto &wire_pair : net.second->wires) {
PipId pip = wire_pair.second.pip;
if (pip == PipId())
continue;
auto &pip_data = chip_pip_info(chip_info, pip);
auto pip_tmg = get_pip_timing(pip_data);
if (pip_tmg != nullptr) {
WireId src = getPipSrcWire(pip), dst = getPipDstWire(pip);
load_cap[src] += pip_tmg->in_cap.slow_max;
drive_res[dst] = (((pip_tmg->flags & 1) || !drive_res.count(src)) ? 0 : drive_res.at(src)) +
pip_tmg->out_res.slow_max;
}
}
}
}
fast_pip_delays = fast_mode;
}
// Helper for cell timing lookups
namespace {
template <typename Tres, typename Tgetter, typename Tkey>
int db_binary_search(const RelSlice<Tres> &list, Tgetter key_getter, Tkey key)
{
if (list.ssize() < 7) {
for (int i = 0; i < list.ssize(); i++) {
if (key_getter(list[i]) == key) {
return i;
}
}
} else {
int b = 0, e = list.ssize() - 1;
while (b <= e) {
int i = (b + e) / 2;
if (key_getter(list[i]) == key) {
return i;
}
if (key_getter(list[i]) > key)
e = i - 1;
else
b = i + 1;
}
}
return -1;
}
} // namespace
int Arch::get_cell_timing_idx(IdString type_variant) const
{
return db_binary_search(
speed_grade->cell_types, [](const CellTimingPOD &ct) { return ct.type_variant; }, type_variant.index);
}
bool Arch::lookup_cell_delay(int type_idx, IdString from_port, IdString to_port, DelayQuad &delay) const
{
NPNR_ASSERT(type_idx != -1);
const auto &ct = speed_grade->cell_types[type_idx];
int to_pin_idx = db_binary_search(
ct.pins, [](const CellPinTimingPOD &pd) { return pd.pin; }, to_port.index);
if (to_pin_idx == -1)
return false;
const auto &tp = ct.pins[to_pin_idx];
int arc_idx = db_binary_search(
tp.comb_arcs, [](const CellPinCombArcPOD &arc) { return arc.input; }, from_port.index);
if (arc_idx == -1)
return false;
delay = DelayQuad(tp.comb_arcs[arc_idx].delay.fast_min, tp.comb_arcs[arc_idx].delay.slow_max);
return true;
}
const RelSlice<CellPinRegArcPOD> *Arch::lookup_cell_seq_timings(int type_idx, IdString port) const
{
NPNR_ASSERT(type_idx != -1);
const auto &ct = speed_grade->cell_types[type_idx];
int pin_idx = db_binary_search(
ct.pins, [](const CellPinTimingPOD &pd) { return pd.pin; }, port.index);
if (pin_idx == -1)
return nullptr;
return &ct.pins[pin_idx].reg_arcs;
}
TimingPortClass Arch::lookup_port_tmg_type(int type_idx, IdString port, PortType dir) const
{
NPNR_ASSERT(type_idx != -1);
const auto &ct = speed_grade->cell_types[type_idx];
int pin_idx = db_binary_search(
ct.pins, [](const CellPinTimingPOD &pd) { return pd.pin; }, port.index);
if (pin_idx == -1)
return (dir == PORT_OUT) ? TMG_IGNORE : TMG_COMB_INPUT;
auto &pin = ct.pins[pin_idx];
if (dir == PORT_IN) {
if (pin.flags & CellPinTimingPOD::FLAG_CLK)
return TMG_CLOCK_INPUT;
return pin.reg_arcs.ssize() > 0 ? TMG_REGISTER_INPUT : TMG_COMB_INPUT;
} else {
// If a clock-to-out entry exists, then this is a register output
return pin.reg_arcs.ssize() > 0 ? TMG_REGISTER_OUTPUT : TMG_COMB_OUTPUT;
}
}
// TODO: adding uarch overrides for these?
bool Arch::getCellDelay(const CellInfo *cell, IdString fromPort, IdString toPort, DelayQuad &delay) const
{
if (cell->timing_index == -1)
return false;
return lookup_cell_delay(cell->timing_index, fromPort, toPort, delay);
}
TimingPortClass Arch::getPortTimingClass(const CellInfo *cell, IdString port, int &clockInfoCount) const
{
if (cell->timing_index == -1)
return TMG_IGNORE;
auto type = lookup_port_tmg_type(cell->timing_index, port, cell->ports.at(port).type);
clockInfoCount = 0;
if (type == TMG_REGISTER_INPUT || type == TMG_REGISTER_OUTPUT) {
auto reg_arcs = lookup_cell_seq_timings(cell->timing_index, port);
if (reg_arcs)
clockInfoCount = reg_arcs->ssize();
}
return type;
}
TimingClockingInfo Arch::getPortClockingInfo(const CellInfo *cell, IdString port, int index) const
{
TimingClockingInfo result;
NPNR_ASSERT(cell->timing_index != -1);
auto reg_arcs = lookup_cell_seq_timings(cell->timing_index, port);
NPNR_ASSERT(reg_arcs);
const auto &arc = (*reg_arcs)[index];
result.clock_port = IdString(arc.clock);
result.edge = ClockEdge(arc.edge);
result.setup = DelayPair(arc.setup.fast_min, arc.setup.slow_max);
result.hold = DelayPair(arc.hold.fast_min, arc.hold.slow_max);
result.clockToQ = DelayQuad(arc.clk_q.fast_min, arc.clk_q.slow_max);
return result;
}
NEXTPNR_NAMESPACE_END

View File

@ -388,6 +388,7 @@ struct ArchArgs
std::string uarch;
std::string chipdb;
std::string device;
std::string speed;
dict<std::string, std::string> options;
};
@ -426,6 +427,7 @@ struct Arch : BaseArch<ArchRanges>
boost::iostreams::mapped_file_source blob_file;
const ChipInfoPOD *chip_info;
const PackageInfoPOD *package_info = nullptr;
const SpeedGradePOD *speed_grade = nullptr;
// Unlike Viaduct, we are not -generic based and therefore uarch must be non-nullptr
std::unique_ptr<HimbaechelAPI> uarch;
@ -522,7 +524,27 @@ struct Arch : BaseArch<ArchRanges>
{
return normalise_wire(pip.tile, chip_pip_info(chip_info, pip).dst_wire);
}
DelayQuad getPipDelay(PipId pip) const override { return DelayQuad(100); }
DelayQuad getPipDelay(PipId pip) const override
{
auto &pip_data = chip_pip_info(chip_info, pip);
auto pip_tmg = get_pip_timing(pip_data);
if (pip_tmg != nullptr) {
// TODO: multi corner analysis
WireId src = getPipSrcWire(pip);
uint64_t input_res = fast_pip_delays ? 0 : (drive_res.count(src) ? drive_res.at(src) : 0);
uint64_t input_cap = fast_pip_delays ? 0 : (load_cap.count(src) ? load_cap.at(src) : 0);
auto src_tmg = get_node_timing(src);
if (src_tmg != nullptr)
input_res += (src_tmg->res.slow_max / 2);
// Scale delay (fF * mOhm -> ps)
delay_t total_delay = (input_res * input_cap) / uint64_t(1e6);
total_delay += pip_tmg->int_delay.slow_max;
return DelayQuad(total_delay);
} else {
// Pip with no specified delay. Return a notional value so the router still has something to work with.
return DelayQuad(100);
}
}
DownhillPipRange getPipsDownhill(WireId wire) const override
{
return DownhillPipRange(chip_info, get_tile_wire_range(wire));
@ -547,11 +569,29 @@ struct Arch : BaseArch<ArchRanges>
}
void bindPip(PipId pip, NetInfo *net, PlaceStrength strength) override
{
if (!fast_pip_delays) {
auto &pip_data = chip_pip_info(chip_info, pip);
auto pip_tmg = get_pip_timing(pip_data);
if (pip_tmg != nullptr) {
WireId src = getPipSrcWire(pip);
load_cap[src] += pip_tmg->in_cap.slow_max;
drive_res[getPipDstWire(pip)] =
(((pip_tmg->flags & 1) || !drive_res.count(src)) ? 0 : drive_res.at(src)) +
pip_tmg->out_res.slow_max;
}
}
uarch->notifyPipChange(pip, net);
BaseArch::bindPip(pip, net, strength);
}
void unbindPip(PipId pip) override
{
if (!fast_pip_delays) {
auto &pip_data = chip_pip_info(chip_info, pip);
auto pip_tmg = get_pip_timing(pip_data);
if (pip_tmg != nullptr) {
load_cap[getPipSrcWire(pip)] -= pip_tmg->in_cap.slow_max;
}
}
uarch->notifyPipChange(pip, nullptr);
BaseArch::unbindPip(pip);
}
@ -667,10 +707,65 @@ struct Arch : BaseArch<ArchRanges>
}
}
// -------------------------------------------------
const PipTimingPOD *get_pip_timing(const PipDataPOD &pip_data) const
{
int32_t idx = pip_data.timing_idx;
if (speed_grade && idx >= 0 && idx < speed_grade->pip_classes.ssize())
return &(speed_grade->pip_classes[idx]);
else
return nullptr;
}
const NodeTimingPOD *get_node_timing(WireId wire) const
{
int idx = -1;
if (!speed_grade)
return nullptr;
if (is_nodal_wire(chip_info, wire.tile, wire.index)) {
auto &shape = chip_node_shape(chip_info, wire.tile, wire.index);
idx = shape.timing_idx;
} else {
auto &wire_data = chip_wire_info(chip_info, wire);
idx = wire_data.timing_idx;
}
if (idx >= 0 && idx < speed_grade->node_classes.ssize())
return &(speed_grade->node_classes[idx]);
else
return nullptr;
}
// -------------------------------------------------
// Given cell type and variant, get the index inside the speed grade timing data
int get_cell_timing_idx(IdString type_variant) const;
// Return true and set delay if a comb path exists in a given cell timing index
bool lookup_cell_delay(int type_idx, IdString from_port, IdString to_port, DelayQuad &delay) const;
// Get setup and hold time and associated clock for a given cell timing index and signal
const RelSlice<CellPinRegArcPOD> *lookup_cell_seq_timings(int type_idx, IdString port) const;
// Attempt to look up port type based on timing database
TimingPortClass lookup_port_tmg_type(int type_idx, IdString port, PortType dir) const;
// -------------------------------------------------
bool getCellDelay(const CellInfo *cell, IdString fromPort, IdString toPort, DelayQuad &delay) const override;
// Get the port class, also setting clockInfoCount to the number of TimingClockingInfos associated with a port
TimingPortClass getPortTimingClass(const CellInfo *cell, IdString port, int &clockInfoCount) const override;
// Get the TimingClockingInfo of a port
TimingClockingInfo getPortClockingInfo(const CellInfo *cell, IdString port, int index) const override;
// -------------------------------------------------
void init_tiles();
void set_fast_pip_delays(bool fast_mode);
std::vector<IdString> tile_name;
dict<IdString, int> tile_name2idx;
// Load capacitance and drive resistance for nodes
// TODO: does this `dict` hurt routing performance too much?
bool fast_pip_delays = false;
dict<WireId, uint64_t> drive_res;
dict<WireId, uint64_t> load_cap;
};
NEXTPNR_NAMESPACE_END

View File

@ -98,6 +98,7 @@ struct ArchNetInfo
struct ArchCellInfo : BaseClusterInfo
{
int flat_index;
int timing_index = -1;
dict<IdString, std::vector<IdString>> cell_bel_pins;
};

View File

@ -62,7 +62,8 @@ NPNR_PACKED_STRUCT(struct BelPinRefPOD {
NPNR_PACKED_STRUCT(struct TileWireDataPOD {
int32_t name;
int32_t wire_type;
int32_t flags; // 32 bits of arbitrary data
int32_t flags; // 32 bits of arbitrary data
int32_t timing_idx; // used only when the wire is not part of a node, otherwise node idx applies
RelSlice<int32_t> pips_uphill;
RelSlice<int32_t> pips_downhill;
RelSlice<BelPinRefPOD> bel_pins;
@ -85,7 +86,7 @@ NPNR_PACKED_STRUCT(struct RelTileWireRefPOD {
NPNR_PACKED_STRUCT(struct NodeShapePOD {
RelSlice<RelTileWireRefPOD> tile_wires;
int32_t timing_index;
int32_t timing_idx;
});
NPNR_PACKED_STRUCT(struct TileTypePOD {
@ -151,12 +152,6 @@ NPNR_PACKED_STRUCT(struct TimingValue {
int32_t slow_max;
});
NPNR_PACKED_STRUCT(struct BelPinTimingPOD {
TimingValue in_cap;
TimingValue drive_res;
TimingValue delay;
});
NPNR_PACKED_STRUCT(struct PipTimingPOD {
TimingValue int_delay;
TimingValue in_cap;
@ -186,19 +181,19 @@ NPNR_PACKED_STRUCT(struct CellPinCombArcPOD {
NPNR_PACKED_STRUCT(struct CellPinTimingPOD {
int32_t pin;
int32_t flags;
RelSlice<CellPinCombArcPOD> comb_arcs;
RelSlice<CellPinRegArcPOD> reg_arcs;
static constexpr int32_t FLAG_CLK = 1;
});
NPNR_PACKED_STRUCT(struct CellTimingPOD {
int32_t type;
int32_t variant;
int32_t type_variant;
RelSlice<CellPinTimingPOD> pins;
});
NPNR_PACKED_STRUCT(struct SpeedGradePOD {
int32_t name;
RelSlice<BelPinTimingPOD> bel_pin_classes;
RelSlice<PipTimingPOD> pip_classes;
RelSlice<NodeTimingPOD> node_classes;
RelSlice<CellTimingPOD> cell_types;

View File

@ -17,10 +17,13 @@ class BBAWriter:
def label(self, s):
print(f"label {s}", file=self.f)
def u8(self, n, comment=""):
assert isinstance(n, int), n
print(f"u8 {n} {comment}", file=self.f)
def u16(self, n, comment=""):
assert isinstance(n, int), n
print(f"u16 {n} {comment}", file=self.f)
def u32(self, n, comment=""):
assert isinstance(n, int), n
print(f"u32 {n} {comment}", file=self.f)
def pop(self):
print("pop", file=self.f)

View File

@ -25,7 +25,7 @@ class BBAStruct(abc.ABC):
def serialise(self, context: str, bba: BBAWriter):
pass
@dataclass(eq=True, frozen=True)
@dataclass(eq=True, order=True, frozen=True)
class IdString:
index: int = 0
@ -144,6 +144,7 @@ class TileWireData:
name: IdString
wire_type: IdString
flags: int = 0
timing_idx: int = -1
# not serialised, but used to build the global constant networks
const_val: int = -1
@ -167,6 +168,7 @@ class TileWireData:
bba.u32(self.name.index)
bba.u32(self.wire_type.index)
bba.u32(self.flags)
bba.u32(self.timing_idx)
bba.slice(f"{context}_pips_uh", len(self.pips_uphill))
bba.slice(f"{context}_pips_dh", len(self.pips_downhill))
bba.slice(f"{context}_bel_pins", len(self.bel_pins))
@ -191,6 +193,7 @@ class PipData(BBAStruct):
@dataclass
class TileType(BBAStruct):
strs: StringPool
tmg: "TimingPool"
type_name: IdString
bels: list[BelData] = field(default_factory=list)
pips: list[PipData] = field(default_factory=list)
@ -223,11 +226,12 @@ class TileType(BBAStruct):
self._wire2idx[wire.name] = wire.index
self.wires.append(wire)
return wire
def create_pip(self, src: str, dst: str):
def create_pip(self, src: str, dst: str, timing_class: str=""):
# Create a pip between two tile wires in the tile type. Both wires should exist already.
src_idx = self._wire2idx[self.strs.id(src)]
dst_idx = self._wire2idx[self.strs.id(dst)]
pip = PipData(index=len(self.pips), src_wire=src_idx, dst_wire=dst_idx)
pip = PipData(index=len(self.pips), src_wire=src_idx, dst_wire=dst_idx,
timing_idx=self.tmg.pip_class_idx(timing_class))
self.wires[src_idx].pips_downhill.append(pip.index)
self.wires[dst_idx].pips_uphill.append(pip.index)
self.pips.append(pip)
@ -294,6 +298,8 @@ class TileWireRef(BBAStruct):
@dataclass
class NodeShape(BBAStruct):
wires: list[TileWireRef] = field(default_factory=list)
timing_index: int = -1
def key(self):
m = hashlib.sha1()
for wire in self.wires:
@ -310,7 +316,7 @@ class NodeShape(BBAStruct):
bba.u16(0) # alignment
def serialise(self, context: str, bba: BBAWriter):
bba.slice(f"{context}_wires", len(self.wires))
bba.u32(-1) # timing index (not yet used)
bba.u32(self.timing_index) # timing index (not yet used)
MODE_TILE_WIRE = 0x7000
MODE_IS_ROOT = 0x7001
@ -430,6 +436,247 @@ class PackageInfo(BBAStruct):
bba.u32(self.name.index)
bba.slice(f"{context}_pads", len(self.pads))
class TimingValue(BBAStruct):
def __init__(self, fast_min=0, fast_max=None, slow_min=None, slow_max=None):
self.fast_min = fast_min
self.fast_max = fast_max or fast_min
self.slow_min = slow_min or self.fast_min
self.slow_max = slow_max or self.fast_max
def serialise_lists(self, context: str, bba: BBAWriter):
pass
def serialise(self, context: str, bba: BBAWriter):
bba.u32(self.fast_min)
bba.u32(self.fast_max)
bba.u32(self.slow_min)
bba.u32(self.slow_max)
@dataclass
class PipTiming(BBAStruct):
int_delay: TimingValue = field(default_factory=TimingValue) # internal fixed delay in ps
in_cap: TimingValue = field(default_factory=TimingValue) # internal capacitance in notional femtofarads
out_res: TimingValue = field(default_factory=TimingValue) # drive/output resistance in notional milliohms
flags: int = 0 # is_buffered etc
def serialise_lists(self, context: str, bba: BBAWriter):
pass
def serialise(self, context: str, bba: BBAWriter):
self.int_delay.serialise(context, bba)
self.in_cap.serialise(context, bba)
self.out_res.serialise(context, bba)
bba.u32(self.flags)
@dataclass
class NodeTiming(BBAStruct):
res: TimingValue = field(default_factory=TimingValue) # wire resistance in notional milliohms
cap: TimingValue = field(default_factory=TimingValue) # wire capacitance in notional femtofarads
delay: TimingValue = field(default_factory=TimingValue) # fixed wire delay in ps
def serialise_lists(self, context: str, bba: BBAWriter):
pass
def serialise(self, context: str, bba: BBAWriter):
self.res.serialise(context, bba)
self.cap.serialise(context, bba)
self.delay.serialise(context, bba)
@dataclass
class ClockEdge(Enum):
RISING = 0
FALLING = 1
@dataclass
class CellPinRegArc(BBAStruct):
clock: int
edge: ClockEdge
setup: TimingValue = field(default_factory=TimingValue) # setup time in ps
hold: TimingValue = field(default_factory=TimingValue) # hold time in ps
clk_q: TimingValue = field(default_factory=TimingValue) # clock to output time in ps
def serialise_lists(self, context: str, bba: BBAWriter):
pass
def serialise(self, context: str, bba: BBAWriter):
bba.u32(self.clock.index)
bba.u32(self.edge.value)
self.setup.serialise(context, bba)
self.hold.serialise(context, bba)
self.clk_q.serialise(context, bba)
@dataclass
class CellPinCombArc(BBAStruct):
from_pin: int
delay: TimingValue = field(default_factory=TimingValue)
def serialise_lists(self, context: str, bba: BBAWriter):
pass
def serialise(self, context: str, bba: BBAWriter):
bba.u32(self.from_pin.index)
self.delay.serialise(context, bba)
@dataclass
class CellPinTiming(BBAStruct):
pin: int
flags: int = 0
comb_arcs: list[CellPinCombArc] = field(default_factory=list) # sorted by from_pin ID index
reg_arcs: list[CellPinRegArc] = field(default_factory=list) # sorted by clock ID index
def set_clock(self):
self.flags |= 1
def finalise(self):
self.comb_arcs.sort(key=lambda a: a.from_pin)
self.reg_arcs.sort(key=lambda a: a.clock)
def serialise_lists(self, context: str, bba: BBAWriter):
bba.label(f"{context}_comb")
for i, a in enumerate(self.comb_arcs):
a.serialise(f"{context}_comb{i}", bba)
bba.label(f"{context}_reg")
for i, a in enumerate(self.reg_arcs):
a.serialise(f"{context}_reg{i}", bba)
def serialise(self, context: str, bba: BBAWriter):
bba.u32(self.pin.index) # pin idstring
bba.u32(self.flags)
bba.slice(f"{context}_comb", len(self.comb_arcs))
bba.slice(f"{context}_reg", len(self.reg_arcs))
class CellTiming(BBAStruct):
def __init__(self, strs: StringPool, type_variant: str):
self.strs = strs
self.type_variant = strs.id(type_variant)
self.pin_data = {}
# combinational timing through a cell (like a LUT delay)
def add_comb_arc(self, from_pin: str, to_pin: str, delay: TimingValue):
if to_pin not in self.pin_data:
self.pin_data[to_pin] = CellPinTiming(pin=self.strs.id(to_pin))
self.pin_data[to_pin].comb_arcs.append(CellPinCombArc(from_pin=self.strs.id(from_pin), delay=delay))
# register input style timing (like a DFF input)
def add_setup_hold(self, clock: str, input_pin: str, edge: ClockEdge, setup: TimingValue, hold: TimingValue):
if input_pin not in self.pin_data:
self.pin_data[input_pin] = CellPinTiming(pin=self.strs.id(input_pin))
if clock not in self.pin_data:
self.pin_data[clock] = CellPinTiming(pin=self.strs.id(clock))
self.pin_data[input_pin].reg_arcs.append(CellPinRegArc(clock=self.strs.id(clock), edge=edge, setup=setup, hold=hold))
self.pin_data[clock].set_clock()
# register output style timing (like a DFF output)
def add_clock_out(self, clock: str, output_pin: str, edge: ClockEdge, delay: TimingValue):
if output_pin not in self.pin_data:
self.pin_data[output_pin] = CellPinTiming(pin=self.strs.id(output_pin))
if clock not in self.pin_data:
self.pin_data[clock] = CellPinTiming(pin=self.strs.id(clock))
self.pin_data[output_pin].reg_arcs.append(CellPinRegArc(clock=self.strs.id(clock), edge=edge, clk_q=delay))
self.pin_data[clock].set_clock()
def finalise(self):
self.pins = list(self.pin_data.values())
self.pins.sort(key=lambda p: p.pin)
for pin in self.pins:
pin.finalise()
def serialise_lists(self, context: str, bba: BBAWriter):
for i, p in enumerate(self.pins):
p.serialise_lists(f"{context}_pin{i}", bba)
bba.label(f"{context}_pins")
for i, p in enumerate(self.pins):
p.serialise(f"{context}_pin{i}", bba)
def serialise(self, context: str, bba: BBAWriter):
bba.u32(self.type_variant.index) # type idstring
bba.slice(f"{context}_pins", len(self.pins))
@dataclass
class SpeedGrade(BBAStruct):
name: int
pip_classes: list[PipTiming|None] = field(default_factory=list)
node_classes: list[NodeTiming|None] = field(default_factory=list)
cell_types: list[CellTiming] = field(default_factory=list) # sorted by (cell_type, variant) ID tuple
def finalise(self):
self.cell_types.sort(key=lambda ty: ty.type_variant)
for ty in self.cell_types:
ty.finalise()
def serialise_lists(self, context: str, bba: BBAWriter):
for i, t in enumerate(self.cell_types):
t.serialise_lists(f"{context}_cellty{i}", bba)
bba.label(f"{context}_pip_classes")
for i, p in enumerate(self.pip_classes):
p.serialise(f"{context}_pipc{i}", bba)
bba.label(f"{context}_node_classes")
for i, n in enumerate(self.node_classes):
n.serialise(f"{context}_nodec{i}", bba)
bba.label(f"{context}_cell_types")
for i, t in enumerate(self.cell_types):
t.serialise(f"{context}_cellty{i}", bba)
def serialise(self, context: str, bba: BBAWriter):
bba.u32(self.name.index) # speed grade idstring
bba.slice(f"{context}_pip_classes", len(self.pip_classes))
bba.slice(f"{context}_node_classes", len(self.node_classes))
bba.slice(f"{context}_cell_types", len(self.cell_types))
class TimingPool(BBAStruct):
def __init__(self, strs: StringPool):
self.strs = strs
self.speed_grades = []
self.speed_grade_idx = {}
self.pip_classes = {}
self.node_classes = {}
def set_speed_grades(self, speed_grades: list):
assert len(self.speed_grades) == 0
self.speed_grades = [SpeedGrade(name=self.strs.id(g)) for g in speed_grades]
self.speed_grade_idx = {g: i for i, g in enumerate(speed_grades)}
def pip_class_idx(self, name: str):
if name == "":
return -1
elif name in self.pip_classes:
return self.pip_classes[name]
else:
idx = len(self.pip_classes)
self.pip_classes[name] = idx
return idx
def node_class_idx(self, name: str):
if name == "":
return -1
elif name in self.node_classes:
return self.node_classes[name]
else:
idx = len(self.node_classes)
self.node_classes[name] = idx
return idx
def set_pip_class(self, grade: str, name: str, delay: TimingValue,
in_cap: Optional[TimingValue]=None, out_res: Optional[TimingValue]=None,
is_buffered=True):
idx = self.pip_class_idx(name)
sg = self.speed_grades[self.speed_grade_idx[grade]]
if idx >= len(sg.pip_classes):
sg.pip_classes += [None for i in range(1 + idx - len(sg.pip_classes))]
assert sg.pip_classes[idx] is None, f"attempting to set pip class {name} in speed grade {grade} twice"
sg.pip_classes[idx] = PipTiming(int_delay=delay, in_cap=in_cap, out_res=out_res, flags=(1 if is_buffered else 0))
def set_bel_pin_class(self, grade: str, name: str, delay: TimingValue,
in_cap: Optional[TimingValue]=None, out_res: Optional[TimingValue]=None):
# bel pin classes are shared with pip classes, but this alias adds a bit of extra clarity
set_pip_class(self, grade, name, delay, in_cap, out_res, is_buffered=True)
def set_node_class(self, grade: str, name: str, delay: TimingValue,
res: Optional[TimingValue]=None, cap: Optional[TimingValue]=None):
idx = self.node_class_idx(name)
sg = self.speed_grades[self.speed_grade_idx[grade]]
if idx >= len(sg.node_classes):
sg.node_classes += [None for i in range(1 + idx - len(sg.node_classes))]
assert sg.node_classes[idx] is None, f"attempting to set node class {name} in speed grade {grade} twice"
sg.node_classes[idx] = NodeTiming(delay=delay, res=res, cap=cap)
def add_cell_variant(self, speed_grade: str, name: str):
cell = CellTiming(self.strs, name)
self.speed_grades[self.speed_grade_idx[speed_grade]].cell_types.append(cell)
return cell
def finalise(self):
for sg in self.speed_grades:
sg.finalise()
class Chip:
def __init__(self, uarch: str, name: str, width: int, height: int):
self.strs = StringPool()
@ -446,8 +693,9 @@ class Chip:
self.tile_shapes_idx = dict()
self.packages = []
self.extra_data = None
self.timing = TimingPool(self.strs)
def create_tile_type(self, name: str):
tt = TileType(self.strs, self.strs.id(name))
tt = TileType(self.strs, self.timing, self.strs.id(name))
self.tile_type_idx[name] = len(self.tile_types)
self.tile_types.append(tt)
return tt
@ -456,6 +704,9 @@ class Chip:
def tile_type_at(self, x: int, y: int):
assert self.tiles[y][x].type_idx is not None, f"tile type at ({x}, {y}) must be set"
return self.tile_types[self.tiles[y][x].type_idx]
def set_speed_grades(self, speed_grades: list):
self.timing.set_speed_grades(speed_grades)
return self.timing
def add_node(self, wires: list[NodeWire]):
# add a node - joining between multiple tile wires into a single connection (from nextpnr's point of view)
# all the tile wires must exist, and the tile types must be set, first
@ -529,7 +780,8 @@ class Chip:
for y, row in enumerate(self.tiles):
for x, tinst in enumerate(row):
tinst.serialise_lists(f"tinst_{x}_{y}", bba)
for i, sg in enumerate(self.timing.speed_grades):
sg.serialise_lists(f"sg{i}", bba)
self.strs.serialise_lists(f"constids", bba)
if self.extra_data is not None:
self.extra_data.serialise_lists("extra_data", bba)
@ -552,7 +804,9 @@ class Chip:
for y, row in enumerate(self.tiles):
for x, tinst in enumerate(row):
tinst.serialise(f"tinst_{x}_{y}", bba)
bba.label(f"speed_grades")
for i, sg in enumerate(self.timing.speed_grades):
sg.serialise(f"sg{i}", bba)
bba.label(f"constids")
self.strs.serialise(f"constids", bba)
@ -573,8 +827,7 @@ class Chip:
# packages
bba.slice("packages", len(self.packages))
# speed grades: not yet used
bba.u32(0)
bba.u32(0)
bba.slice("speed_grades", len(self.timing.speed_grades))
# db-defined constids
bba.ref("constids")
# extra data
@ -584,6 +837,7 @@ class Chip:
bba.u32(0)
def write_bba(self, filename):
self.timing.finalise()
with open(filename, "w") as f:
bba = BBAWriter(f)
bba.pre('#include \"nextpnr.h\"')

View File

@ -49,6 +49,7 @@ po::options_description HimbaechelCommandHandler::getArchOptions()
po::options_description specific("Architecture specific options");
specific.add_options()("uarch", po::value<std::string>(), uarch_help.c_str());
specific.add_options()("chipdb", po::value<std::string>(), "path to chip database file");
specific.add_options()("speed", po::value<std::string>(), "device speed grade");
specific.add_options()("vopt,o", po::value<std::vector<std::string>>(), "options to pass to the himbächel uarch");
return specific;
@ -70,6 +71,8 @@ std::unique_ptr<Context> HimbaechelCommandHandler::createContext(dict<std::strin
log_error("chip database path must be specified.\n");
chipArgs.uarch = vm["uarch"].as<std::string>();
chipArgs.chipdb = vm["chipdb"].as<std::string>();
if (vm.count("speed"))
chipArgs.speed = vm["speed"].as<std::string>();
if (vm.count("vopt")) {
std::vector<std::string> options = vm["vopt"].as<std::vector<std::string>>();
for (const auto &opt : options) {

View File

@ -40,15 +40,15 @@ def create_switch_matrix(tt: TileType, inputs: list[str], outputs: list[str]):
# input pips
for i, w in enumerate(inputs):
for j in range((i % Si), Wl, Si):
tt.create_pip(f"SWITCH{j}", w)
tt.create_pip(f"SWITCH{j}", w, timing_class="SWINPUT")
# output pips
for i, w in enumerate(outputs):
for j in range((i % Sq), Wl, Sq):
tt.create_pip(w, f"SWITCH{j}")
tt.create_pip(w, f"SWITCH{j}", timing_class="SWINPUT")
# neighbour local pips
for i in range(Wl):
for j, (d, dx, dy) in enumerate(dirs):
tt.create_pip(f"{d}{(i + j) % Wl}", f"SWITCH{i}")
tt.create_pip(f"{d}{(i + j) % Wl}", f"SWITCH{i}", timing_class="SWNEIGH")
# clock "ladder"
if not tt.has_wire("CLK"):
tt.create_wire(f"CLK", "TILE_CLK")
@ -185,7 +185,37 @@ def create_nodes(ch):
clk_node.append(NodeWire(x, y+1, "CLK_PREV"))
ch.add_node(clk_node)
def set_timings(ch):
speed = "DEFAULT"
tmg = ch.set_speed_grades([speed])
# --- Routing Delays ---
# Notes: A simpler timing model could just use intrinsic delay and ignore R and Cs.
# R and C values don't have to be physically realistic, just in agreement with themselves to provide
# a meaningful scaling of delay with fanout. Units are subject to change.
tmg.set_pip_class(grade=speed, name="SWINPUT",
delay=TimingValue(80), # 80ps intrinstic delay
in_cap=TimingValue(5000), # 5pF
out_res=TimingValue(1000), # 1ohm
)
tmg.set_pip_class(grade=speed, name="SWOUTPUT",
delay=TimingValue(100), # 100ps intrinstic delay
in_cap=TimingValue(5000), # 5pF
out_res=TimingValue(800), # 0.8ohm
)
tmg.set_pip_class(grade=speed, name="SWNEIGH",
delay=TimingValue(120), # 120ps intrinstic delay
in_cap=TimingValue(7000), # 7pF
out_res=TimingValue(1200), # 1.2ohm
)
# TODO: also support node/wire delays and add an example of them
# --- Cell delays ---
lut = ch.timing.add_cell_variant(speed, "LUT4")
for j in range(K):
lut.add_comb_arc(f"I[{j}]", "F", TimingValue(150 + j * 15))
dff = ch.timing.add_cell_variant(speed, "DFF")
dff.add_setup_hold("CLK", "D", ClockEdge.RISING, TimingValue(150), TimingValue(25))
dff.add_clock_out("CLK", "Q", ClockEdge.RISING, TimingValue(200))
def main():
ch = Chip("example", "EX1", X, Y)
@ -211,6 +241,8 @@ def main():
ch.set_tile_type(x, y, "LOGIC")
# Create nodes between tiles
create_nodes(ch)
set_timings(ch)
ch.write_bba(sys.argv[1])
if __name__ == '__main__':
main()