Merge pull request #609 from YosysHQ/gatecat/sta-v2
Use new timing engine for criticality
This commit is contained in:
commit
326b34887c
@ -552,6 +552,10 @@ struct DelayPair
|
|||||||
{
|
{
|
||||||
return {min_delay + other.min_delay, max_delay + other.max_delay};
|
return {min_delay + other.min_delay, max_delay + other.max_delay};
|
||||||
}
|
}
|
||||||
|
DelayPair operator-(const DelayPair &other) const
|
||||||
|
{
|
||||||
|
return {min_delay - other.min_delay, max_delay - other.max_delay};
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// four-quadrant, min and max rise and fall delay
|
// four-quadrant, min and max rise and fall delay
|
||||||
@ -575,6 +579,7 @@ struct DelayQuad
|
|||||||
DelayPair delayPair() const { return DelayPair(minDelay(), maxDelay()); };
|
DelayPair delayPair() const { return DelayPair(minDelay(), maxDelay()); };
|
||||||
|
|
||||||
DelayQuad operator+(const DelayQuad &other) const { return {rise + other.rise, fall + other.fall}; }
|
DelayQuad operator+(const DelayQuad &other) const { return {rise + other.rise, fall + other.fall}; }
|
||||||
|
DelayQuad operator-(const DelayQuad &other) const { return {rise - other.rise, fall - other.fall}; }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ClockConstraint;
|
struct ClockConstraint;
|
||||||
|
@ -78,7 +78,7 @@ class SAPlacer
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
SAPlacer(Context *ctx, Placer1Cfg cfg)
|
SAPlacer(Context *ctx, Placer1Cfg cfg)
|
||||||
: ctx(ctx), fast_bels(ctx, /*check_bel_available=*/false, cfg.minBelsForGridPick), cfg(cfg)
|
: ctx(ctx), fast_bels(ctx, /*check_bel_available=*/false, cfg.minBelsForGridPick), cfg(cfg), tmg(ctx)
|
||||||
{
|
{
|
||||||
for (auto bel : ctx->getBels()) {
|
for (auto bel : ctx->getBels()) {
|
||||||
Loc loc = ctx->getBelLocation(bel);
|
Loc loc = ctx->getBelLocation(bel);
|
||||||
@ -241,8 +241,9 @@ class SAPlacer
|
|||||||
auto saplace_start = std::chrono::high_resolution_clock::now();
|
auto saplace_start = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
// Invoke timing analysis to obtain criticalities
|
// Invoke timing analysis to obtain criticalities
|
||||||
|
tmg.setup_only = true;
|
||||||
if (!cfg.budgetBased)
|
if (!cfg.budgetBased)
|
||||||
get_criticalities(ctx, &net_crit);
|
tmg.setup();
|
||||||
|
|
||||||
// Calculate costs after initial placement
|
// Calculate costs after initial placement
|
||||||
setup_costs();
|
setup_costs();
|
||||||
@ -379,7 +380,7 @@ class SAPlacer
|
|||||||
|
|
||||||
// Invoke timing analysis to obtain criticalities
|
// Invoke timing analysis to obtain criticalities
|
||||||
if (!cfg.budgetBased && cfg.timing_driven)
|
if (!cfg.budgetBased && cfg.timing_driven)
|
||||||
get_criticalities(ctx, &net_crit);
|
tmg.run();
|
||||||
// Need to rebuild costs after criticalities change
|
// Need to rebuild costs after criticalities change
|
||||||
setup_costs();
|
setup_costs();
|
||||||
// Reset incremental bounds
|
// Reset incremental bounds
|
||||||
@ -836,11 +837,9 @@ class SAPlacer
|
|||||||
double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user)));
|
double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user)));
|
||||||
return std::min(10.0, std::exp(delay - ctx->getDelayNS(net->users.at(user).budget) / 10));
|
return std::min(10.0, std::exp(delay - ctx->getDelayNS(net->users.at(user).budget) / 10));
|
||||||
} else {
|
} else {
|
||||||
auto crit = net_crit.find(net->name);
|
float crit = tmg.get_criticality(CellPortKey(net->users.at(user)));
|
||||||
if (crit == net_crit.end() || crit->second.criticality.empty())
|
|
||||||
return 0;
|
|
||||||
double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user)));
|
double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user)));
|
||||||
return delay * std::pow(crit->second.criticality.at(user), crit_exp);
|
return delay * std::pow(crit, crit_exp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1216,9 +1215,6 @@ class SAPlacer
|
|||||||
wirelen_t last_wirelen_cost, curr_wirelen_cost;
|
wirelen_t last_wirelen_cost, curr_wirelen_cost;
|
||||||
double last_timing_cost, curr_timing_cost;
|
double last_timing_cost, curr_timing_cost;
|
||||||
|
|
||||||
// Criticality data from timing analysis
|
|
||||||
NetCriticalityMap net_crit;
|
|
||||||
|
|
||||||
Context *ctx;
|
Context *ctx;
|
||||||
float temp = 10;
|
float temp = 10;
|
||||||
float crit_exp = 8;
|
float crit_exp = 8;
|
||||||
@ -1235,6 +1231,8 @@ class SAPlacer
|
|||||||
bool require_legal = true;
|
bool require_legal = true;
|
||||||
const int legalise_dia = 4;
|
const int legalise_dia = 4;
|
||||||
Placer1Cfg cfg;
|
Placer1Cfg cfg;
|
||||||
|
|
||||||
|
TimingAnalyser tmg;
|
||||||
};
|
};
|
||||||
|
|
||||||
Placer1Cfg::Placer1Cfg(Context *ctx)
|
Placer1Cfg::Placer1Cfg(Context *ctx)
|
||||||
|
@ -139,9 +139,12 @@ template <typename T> struct EquationSystem
|
|||||||
class HeAPPlacer
|
class HeAPPlacer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
HeAPPlacer(Context *ctx, PlacerHeapCfg cfg) : ctx(ctx), cfg(cfg), fast_bels(ctx, /*check_bel_available=*/true, -1)
|
HeAPPlacer(Context *ctx, PlacerHeapCfg cfg)
|
||||||
|
: ctx(ctx), cfg(cfg), fast_bels(ctx, /*check_bel_available=*/true, -1), tmg(ctx)
|
||||||
{
|
{
|
||||||
Eigen::initParallel();
|
Eigen::initParallel();
|
||||||
|
tmg.setup_only = true;
|
||||||
|
tmg.setup();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool place()
|
bool place()
|
||||||
@ -269,7 +272,7 @@ class HeAPPlacer
|
|||||||
|
|
||||||
// Update timing weights
|
// Update timing weights
|
||||||
if (cfg.timing_driven)
|
if (cfg.timing_driven)
|
||||||
get_criticalities(ctx, &net_crit);
|
tmg.run();
|
||||||
|
|
||||||
if (legal_hpwl < best_hpwl) {
|
if (legal_hpwl < best_hpwl) {
|
||||||
best_hpwl = legal_hpwl;
|
best_hpwl = legal_hpwl;
|
||||||
@ -355,6 +358,8 @@ class HeAPPlacer
|
|||||||
FastBels fast_bels;
|
FastBels fast_bels;
|
||||||
std::unordered_map<IdString, std::tuple<int, int>> bel_types;
|
std::unordered_map<IdString, std::tuple<int, int>> bel_types;
|
||||||
|
|
||||||
|
TimingAnalyser tmg;
|
||||||
|
|
||||||
struct BoundingBox
|
struct BoundingBox
|
||||||
{
|
{
|
||||||
// Actual bounding box
|
// Actual bounding box
|
||||||
@ -392,8 +397,6 @@ class HeAPPlacer
|
|||||||
// Performance counting
|
// Performance counting
|
||||||
double solve_time = 0, cl_time = 0, sl_time = 0;
|
double solve_time = 0, cl_time = 0, sl_time = 0;
|
||||||
|
|
||||||
NetCriticalityMap net_crit;
|
|
||||||
|
|
||||||
// Place cells with the BEL attribute set to constrain them
|
// Place cells with the BEL attribute set to constrain them
|
||||||
void place_constraints()
|
void place_constraints()
|
||||||
{
|
{
|
||||||
@ -736,11 +739,9 @@ class HeAPPlacer
|
|||||||
std::max<double>(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) *
|
std::max<double>(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) *
|
||||||
std::abs(o_pos - this_pos)));
|
std::abs(o_pos - this_pos)));
|
||||||
|
|
||||||
if (user_idx != -1 && net_crit.count(ni->name)) {
|
if (user_idx != -1) {
|
||||||
auto &nc = net_crit.at(ni->name);
|
weight *= (1.0 + cfg.timingWeight * std::pow(tmg.get_criticality(CellPortKey(port)),
|
||||||
if (user_idx < int(nc.criticality.size()))
|
cfg.criticalityExponent));
|
||||||
weight *= (1.0 + cfg.timingWeight *
|
|
||||||
std::pow(nc.criticality.at(user_idx), cfg.criticalityExponent));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation,
|
// If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation,
|
||||||
|
@ -112,16 +112,14 @@ struct Router2
|
|||||||
Context *ctx;
|
Context *ctx;
|
||||||
Router2Cfg cfg;
|
Router2Cfg cfg;
|
||||||
|
|
||||||
Router2(Context *ctx, const Router2Cfg &cfg) : ctx(ctx), cfg(cfg) {}
|
Router2(Context *ctx, const Router2Cfg &cfg) : ctx(ctx), cfg(cfg), tmg(ctx) { tmg.setup(); }
|
||||||
|
|
||||||
// Use 'udata' for fast net lookups and indexing
|
// Use 'udata' for fast net lookups and indexing
|
||||||
std::vector<NetInfo *> nets_by_udata;
|
std::vector<NetInfo *> nets_by_udata;
|
||||||
std::vector<PerNetData> nets;
|
std::vector<PerNetData> nets;
|
||||||
|
|
||||||
bool timing_driven;
|
bool timing_driven;
|
||||||
|
TimingAnalyser tmg;
|
||||||
// Criticality data from timing analysis
|
|
||||||
NetCriticalityMap net_crit;
|
|
||||||
|
|
||||||
void setup_nets()
|
void setup_nets()
|
||||||
{
|
{
|
||||||
@ -1175,18 +1173,13 @@ struct Router2
|
|||||||
if (timing_driven && (int(route_queue.size()) > (int(nets_by_udata.size()) / 50))) {
|
if (timing_driven && (int(route_queue.size()) > (int(nets_by_udata.size()) / 50))) {
|
||||||
// Heuristic: reduce runtime by skipping STA in the case of a "long tail" of a few
|
// Heuristic: reduce runtime by skipping STA in the case of a "long tail" of a few
|
||||||
// congested nodes
|
// congested nodes
|
||||||
get_criticalities(ctx, &net_crit);
|
tmg.run();
|
||||||
for (auto n : route_queue) {
|
for (auto n : route_queue) {
|
||||||
IdString name = nets_by_udata.at(n)->name;
|
NetInfo *ni = nets_by_udata.at(n);
|
||||||
auto fnd = net_crit.find(name);
|
|
||||||
auto &net = nets.at(n);
|
auto &net = nets.at(n);
|
||||||
net.max_crit = 0;
|
net.max_crit = 0;
|
||||||
if (fnd == net_crit.end())
|
for (auto &usr : ni->users) {
|
||||||
continue;
|
float c = tmg.get_criticality(CellPortKey(usr));
|
||||||
for (int i = 0; i < int(fnd->second.criticality.size()); i++) {
|
|
||||||
float c = fnd->second.criticality.at(i);
|
|
||||||
for (auto &a : net.arcs.at(i))
|
|
||||||
a.arc_crit = c;
|
|
||||||
net.max_crit = std::max(net.max_crit, c);
|
net.max_crit = std::max(net.max_crit, c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
706
common/timing.cc
706
common/timing.cc
@ -30,6 +30,547 @@
|
|||||||
|
|
||||||
NEXTPNR_NAMESPACE_BEGIN
|
NEXTPNR_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
void TimingAnalyser::setup()
|
||||||
|
{
|
||||||
|
init_ports();
|
||||||
|
get_cell_delays();
|
||||||
|
topo_sort();
|
||||||
|
setup_port_domains();
|
||||||
|
run();
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::run()
|
||||||
|
{
|
||||||
|
reset_times();
|
||||||
|
get_route_delays();
|
||||||
|
walk_forward();
|
||||||
|
walk_backward();
|
||||||
|
compute_slack();
|
||||||
|
compute_criticality();
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::init_ports()
|
||||||
|
{
|
||||||
|
// Per cell port structures
|
||||||
|
for (auto cell : sorted(ctx->cells)) {
|
||||||
|
CellInfo *ci = cell.second;
|
||||||
|
for (auto port : sorted_ref(ci->ports)) {
|
||||||
|
auto &data = ports[CellPortKey(ci->name, port.first)];
|
||||||
|
data.type = port.second.type;
|
||||||
|
data.cell_port = CellPortKey(ci->name, port.first);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Cell port to net port mapping
|
||||||
|
for (auto net : sorted(ctx->nets)) {
|
||||||
|
NetInfo *ni = net.second;
|
||||||
|
if (ni->driver.cell != nullptr)
|
||||||
|
ports[CellPortKey(ni->driver)].net_port = NetPortKey(ni->name);
|
||||||
|
for (size_t i = 0; i < ni->users.size(); i++)
|
||||||
|
ports[CellPortKey(ni->users.at(i))].net_port = NetPortKey(ni->name, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::get_cell_delays()
|
||||||
|
{
|
||||||
|
for (auto &port : ports) {
|
||||||
|
CellInfo *ci = cell_info(port.first);
|
||||||
|
auto &pi = port_info(port.first);
|
||||||
|
auto &pd = port.second;
|
||||||
|
|
||||||
|
IdString name = port.first.port;
|
||||||
|
// Ignore dangling ports altogether for timing purposes
|
||||||
|
if (pd.net_port.net == IdString())
|
||||||
|
continue;
|
||||||
|
pd.cell_arcs.clear();
|
||||||
|
int clkInfoCount = 0;
|
||||||
|
TimingPortClass cls = ctx->getPortTimingClass(ci, name, clkInfoCount);
|
||||||
|
if (cls == TMG_STARTPOINT || cls == TMG_ENDPOINT || cls == TMG_CLOCK_INPUT || cls == TMG_GEN_CLOCK ||
|
||||||
|
cls == TMG_IGNORE)
|
||||||
|
continue;
|
||||||
|
if (pi.type == PORT_IN) {
|
||||||
|
// Input ports might have setup/hold relationships
|
||||||
|
if (cls == TMG_REGISTER_INPUT) {
|
||||||
|
for (int i = 0; i < clkInfoCount; i++) {
|
||||||
|
auto info = ctx->getPortClockingInfo(ci, name, i);
|
||||||
|
if (!ci->ports.count(info.clock_port) || ci->ports.at(info.clock_port).net == nullptr)
|
||||||
|
continue;
|
||||||
|
pd.cell_arcs.emplace_back(CellArc::SETUP, info.clock_port, DelayQuad(info.setup, info.setup),
|
||||||
|
info.edge);
|
||||||
|
pd.cell_arcs.emplace_back(CellArc::HOLD, info.clock_port, DelayQuad(info.hold, info.hold),
|
||||||
|
info.edge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Combinational delays through cell
|
||||||
|
for (auto &other_port : ci->ports) {
|
||||||
|
auto &op = other_port.second;
|
||||||
|
// ignore dangling ports and non-outputs
|
||||||
|
if (op.net == nullptr || op.type != PORT_OUT)
|
||||||
|
continue;
|
||||||
|
DelayQuad delay;
|
||||||
|
bool is_path = ctx->getCellDelay(ci, name, other_port.first, delay);
|
||||||
|
if (is_path)
|
||||||
|
pd.cell_arcs.emplace_back(CellArc::COMBINATIONAL, other_port.first, delay);
|
||||||
|
}
|
||||||
|
} else if (pi.type == PORT_OUT) {
|
||||||
|
// Output ports might have clk-to-q relationships
|
||||||
|
if (cls == TMG_REGISTER_OUTPUT) {
|
||||||
|
for (int i = 0; i < clkInfoCount; i++) {
|
||||||
|
auto info = ctx->getPortClockingInfo(ci, name, i);
|
||||||
|
if (!ci->ports.count(info.clock_port) || ci->ports.at(info.clock_port).net == nullptr)
|
||||||
|
continue;
|
||||||
|
pd.cell_arcs.emplace_back(CellArc::CLK_TO_Q, info.clock_port, info.clockToQ, info.edge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Combinational delays through cell
|
||||||
|
for (auto &other_port : ci->ports) {
|
||||||
|
auto &op = other_port.second;
|
||||||
|
// ignore dangling ports and non-inputs
|
||||||
|
if (op.net == nullptr || op.type != PORT_IN)
|
||||||
|
continue;
|
||||||
|
DelayQuad delay;
|
||||||
|
bool is_path = ctx->getCellDelay(ci, other_port.first, name, delay);
|
||||||
|
if (is_path)
|
||||||
|
pd.cell_arcs.emplace_back(CellArc::COMBINATIONAL, other_port.first, delay);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::get_route_delays()
|
||||||
|
{
|
||||||
|
for (auto net : sorted(ctx->nets)) {
|
||||||
|
NetInfo *ni = net.second;
|
||||||
|
if (ni->driver.cell == nullptr || ni->driver.cell->bel == BelId())
|
||||||
|
continue;
|
||||||
|
for (auto &usr : ni->users) {
|
||||||
|
if (usr.cell->bel == BelId())
|
||||||
|
continue;
|
||||||
|
ports.at(CellPortKey(usr)).route_delay = DelayPair(ctx->getNetinfoRouteDelay(ni, usr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::topo_sort()
|
||||||
|
{
|
||||||
|
TopoSort<CellPortKey> topo;
|
||||||
|
for (auto &port : ports) {
|
||||||
|
auto &pd = port.second;
|
||||||
|
// All ports are nodes
|
||||||
|
topo.node(port.first);
|
||||||
|
if (pd.type == PORT_IN) {
|
||||||
|
// inputs: combinational arcs through the cell are edges
|
||||||
|
for (auto &arc : pd.cell_arcs) {
|
||||||
|
if (arc.type != CellArc::COMBINATIONAL)
|
||||||
|
continue;
|
||||||
|
topo.edge(port.first, CellPortKey(port.first.cell, arc.other_port));
|
||||||
|
}
|
||||||
|
} else if (pd.type == PORT_OUT) {
|
||||||
|
// output: routing arcs are edges
|
||||||
|
const NetInfo *pn = port_info(port.first).net;
|
||||||
|
if (pn != nullptr) {
|
||||||
|
for (auto &usr : pn->users)
|
||||||
|
topo.edge(port.first, CellPortKey(usr));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool no_loops = topo.sort();
|
||||||
|
if (!no_loops && verbose_mode) {
|
||||||
|
log_info("Found %d combinational loops:\n", int(topo.loops.size()));
|
||||||
|
int i = 0;
|
||||||
|
for (auto &loop : topo.loops) {
|
||||||
|
log_info(" loop %d:\n", ++i);
|
||||||
|
for (auto &port : loop) {
|
||||||
|
log_info(" %s.%s (%s)\n", ctx->nameOf(port.cell), ctx->nameOf(port.port),
|
||||||
|
ctx->nameOf(port_info(port).net));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::swap(topological_order, topo.sorted);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::setup_port_domains()
|
||||||
|
{
|
||||||
|
for (auto &d : domains) {
|
||||||
|
d.startpoints.clear();
|
||||||
|
d.endpoints.clear();
|
||||||
|
}
|
||||||
|
// Go forward through the topological order (domains from the PoV of arrival time)
|
||||||
|
for (auto port : topological_order) {
|
||||||
|
auto &pd = ports.at(port);
|
||||||
|
auto &pi = port_info(port);
|
||||||
|
if (pi.type == PORT_OUT) {
|
||||||
|
for (auto &fanin : pd.cell_arcs) {
|
||||||
|
if (fanin.type != CellArc::CLK_TO_Q)
|
||||||
|
continue;
|
||||||
|
// registered outputs are startpoints
|
||||||
|
auto dom = domain_id(port.cell, fanin.other_port, fanin.edge);
|
||||||
|
// create per-domain data
|
||||||
|
pd.arrival[dom];
|
||||||
|
domains.at(dom).startpoints.emplace_back(port, fanin.other_port);
|
||||||
|
}
|
||||||
|
// copy domains across routing
|
||||||
|
if (pi.net != nullptr)
|
||||||
|
for (auto &usr : pi.net->users)
|
||||||
|
copy_domains(port, CellPortKey(usr), false);
|
||||||
|
} else {
|
||||||
|
// copy domains from input to output
|
||||||
|
for (auto &fanout : pd.cell_arcs) {
|
||||||
|
if (fanout.type != CellArc::COMBINATIONAL)
|
||||||
|
continue;
|
||||||
|
copy_domains(port, CellPortKey(port.cell, fanout.other_port), false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Go backward through the topological order (domains from the PoV of required time)
|
||||||
|
for (auto port : reversed_range(topological_order)) {
|
||||||
|
auto &pd = ports.at(port);
|
||||||
|
auto &pi = port_info(port);
|
||||||
|
if (pi.type == PORT_OUT) {
|
||||||
|
// copy domains from output to input
|
||||||
|
for (auto &fanin : pd.cell_arcs) {
|
||||||
|
if (fanin.type != CellArc::COMBINATIONAL)
|
||||||
|
continue;
|
||||||
|
copy_domains(port, CellPortKey(port.cell, fanin.other_port), true);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (auto &fanout : pd.cell_arcs) {
|
||||||
|
if (fanout.type != CellArc::SETUP)
|
||||||
|
continue;
|
||||||
|
// registered inputs are startpoints
|
||||||
|
auto dom = domain_id(port.cell, fanout.other_port, fanout.edge);
|
||||||
|
// create per-domain data
|
||||||
|
pd.required[dom];
|
||||||
|
domains.at(dom).endpoints.emplace_back(port, fanout.other_port);
|
||||||
|
}
|
||||||
|
// copy port to driver
|
||||||
|
if (pi.net != nullptr && pi.net->driver.cell != nullptr)
|
||||||
|
copy_domains(port, CellPortKey(pi.net->driver), true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Iterate over ports and find domain paris
|
||||||
|
for (auto port : topological_order) {
|
||||||
|
auto &pd = ports.at(port);
|
||||||
|
for (auto &arr : pd.arrival)
|
||||||
|
for (auto &req : pd.required) {
|
||||||
|
pd.domain_pairs[domain_pair_id(arr.first, req.first)];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::reset_times()
|
||||||
|
{
|
||||||
|
for (auto &port : ports) {
|
||||||
|
auto do_reset = [&](std::unordered_map<domain_id_t, ArrivReqTime> ×) {
|
||||||
|
for (auto &t : times) {
|
||||||
|
t.second.value = init_delay;
|
||||||
|
t.second.path_length = 0;
|
||||||
|
t.second.bwd_min = CellPortKey();
|
||||||
|
t.second.bwd_max = CellPortKey();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
do_reset(port.second.arrival);
|
||||||
|
do_reset(port.second.required);
|
||||||
|
for (auto &dp : port.second.domain_pairs) {
|
||||||
|
dp.second.setup_slack = std::numeric_limits<delay_t>::max();
|
||||||
|
dp.second.hold_slack = std::numeric_limits<delay_t>::max();
|
||||||
|
dp.second.max_path_length = 0;
|
||||||
|
dp.second.criticality = 0;
|
||||||
|
dp.second.budget = 0;
|
||||||
|
}
|
||||||
|
port.second.worst_crit = 0;
|
||||||
|
port.second.worst_setup_slack = std::numeric_limits<delay_t>::max();
|
||||||
|
port.second.worst_hold_slack = std::numeric_limits<delay_t>::max();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::set_arrival_time(CellPortKey target, domain_id_t domain, DelayPair arrival, int path_length,
|
||||||
|
CellPortKey prev)
|
||||||
|
{
|
||||||
|
auto &arr = ports.at(target).arrival.at(domain);
|
||||||
|
if (arrival.max_delay > arr.value.max_delay) {
|
||||||
|
arr.value.max_delay = arrival.max_delay;
|
||||||
|
arr.bwd_max = prev;
|
||||||
|
}
|
||||||
|
if (!setup_only && (arrival.min_delay < arr.value.min_delay)) {
|
||||||
|
arr.value.min_delay = arrival.min_delay;
|
||||||
|
arr.bwd_min = prev;
|
||||||
|
}
|
||||||
|
arr.path_length = std::max(arr.path_length, path_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::set_required_time(CellPortKey target, domain_id_t domain, DelayPair required, int path_length,
|
||||||
|
CellPortKey prev)
|
||||||
|
{
|
||||||
|
auto &req = ports.at(target).required.at(domain);
|
||||||
|
if (required.min_delay < req.value.min_delay) {
|
||||||
|
req.value.min_delay = required.min_delay;
|
||||||
|
req.bwd_min = prev;
|
||||||
|
}
|
||||||
|
if (!setup_only && (required.max_delay > req.value.max_delay)) {
|
||||||
|
req.value.max_delay = required.max_delay;
|
||||||
|
req.bwd_max = prev;
|
||||||
|
}
|
||||||
|
req.path_length = std::max(req.path_length, path_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::walk_forward()
|
||||||
|
{
|
||||||
|
// Assign initial arrival time to domain startpoints
|
||||||
|
for (domain_id_t dom_id = 0; dom_id < domain_id_t(domains.size()); ++dom_id) {
|
||||||
|
auto &dom = domains.at(dom_id);
|
||||||
|
for (auto &sp : dom.startpoints) {
|
||||||
|
auto &pd = ports.at(sp.first);
|
||||||
|
DelayPair init_arrival(0);
|
||||||
|
CellPortKey clock_key;
|
||||||
|
// TODO: clock routing delay, if analysis of that is enabled
|
||||||
|
if (sp.second != IdString()) {
|
||||||
|
// clocked startpoints have a clock-to-out time
|
||||||
|
for (auto &fanin : pd.cell_arcs) {
|
||||||
|
if (fanin.type == CellArc::CLK_TO_Q && fanin.other_port == sp.second) {
|
||||||
|
init_arrival = init_arrival + fanin.value.delayPair();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clock_key = CellPortKey(sp.first.cell, sp.second);
|
||||||
|
}
|
||||||
|
set_arrival_time(sp.first, dom_id, init_arrival, 1, clock_key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Walk forward in topological order
|
||||||
|
for (auto p : topological_order) {
|
||||||
|
auto &pd = ports.at(p);
|
||||||
|
for (auto &arr : pd.arrival) {
|
||||||
|
if (pd.type == PORT_OUT) {
|
||||||
|
// Output port: propagate delay through net, adding route delay
|
||||||
|
NetInfo *net = port_info(p).net;
|
||||||
|
if (net != nullptr)
|
||||||
|
for (auto &usr : net->users) {
|
||||||
|
CellPortKey usr_key(usr);
|
||||||
|
auto &usr_pd = ports.at(usr_key);
|
||||||
|
set_arrival_time(usr_key, arr.first, arr.second.value + usr_pd.route_delay,
|
||||||
|
arr.second.path_length, p);
|
||||||
|
}
|
||||||
|
} else if (pd.type == PORT_IN) {
|
||||||
|
// Input port; propagate delay through cell, adding combinational delay
|
||||||
|
for (auto &fanout : pd.cell_arcs) {
|
||||||
|
if (fanout.type != CellArc::COMBINATIONAL)
|
||||||
|
continue;
|
||||||
|
set_arrival_time(CellPortKey(p.cell, fanout.other_port), arr.first,
|
||||||
|
arr.second.value + fanout.value.delayPair(), arr.second.path_length + 1, p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::walk_backward()
|
||||||
|
{
|
||||||
|
// Assign initial required time to domain endpoints
|
||||||
|
// Note that clock frequency will be considered later in the analysis for, for now all required times are normalised
|
||||||
|
// to 0ns
|
||||||
|
for (domain_id_t dom_id = 0; dom_id < domain_id_t(domains.size()); ++dom_id) {
|
||||||
|
auto &dom = domains.at(dom_id);
|
||||||
|
for (auto &ep : dom.endpoints) {
|
||||||
|
auto &pd = ports.at(ep.first);
|
||||||
|
DelayPair init_setuphold(0);
|
||||||
|
CellPortKey clock_key;
|
||||||
|
// TODO: clock routing delay, if analysis of that is enabled
|
||||||
|
if (ep.second != IdString()) {
|
||||||
|
// Add setup/hold time, if this endpoint is clocked
|
||||||
|
for (auto &fanin : pd.cell_arcs) {
|
||||||
|
if (fanin.type == CellArc::SETUP && fanin.other_port == ep.second)
|
||||||
|
init_setuphold.min_delay -= fanin.value.maxDelay();
|
||||||
|
if (fanin.type == CellArc::HOLD && fanin.other_port == ep.second)
|
||||||
|
init_setuphold.max_delay -= fanin.value.maxDelay();
|
||||||
|
}
|
||||||
|
clock_key = CellPortKey(ep.first.cell, ep.second);
|
||||||
|
}
|
||||||
|
set_required_time(ep.first, dom_id, init_setuphold, 1, clock_key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Walk backwards in topological order
|
||||||
|
for (auto p : reversed_range(topological_order)) {
|
||||||
|
auto &pd = ports.at(p);
|
||||||
|
for (auto &req : pd.required) {
|
||||||
|
if (pd.type == PORT_IN) {
|
||||||
|
// Input port: propagate delay back through net, subtracting route delay
|
||||||
|
NetInfo *net = port_info(p).net;
|
||||||
|
if (net != nullptr && net->driver.cell != nullptr)
|
||||||
|
set_required_time(CellPortKey(net->driver), req.first, req.second.value - pd.route_delay,
|
||||||
|
req.second.path_length, p);
|
||||||
|
} else if (pd.type == PORT_OUT) {
|
||||||
|
// Output port : propagate delay back through cell, subtracting combinational delay
|
||||||
|
for (auto &fanin : pd.cell_arcs) {
|
||||||
|
if (fanin.type != CellArc::COMBINATIONAL)
|
||||||
|
continue;
|
||||||
|
set_required_time(CellPortKey(p.cell, fanin.other_port), req.first,
|
||||||
|
req.second.value - fanin.value.delayPair(), req.second.path_length + 1, p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::print_fmax()
|
||||||
|
{
|
||||||
|
// Temporary testing code for comparison only
|
||||||
|
std::unordered_map<int, double> domain_fmax;
|
||||||
|
for (auto p : topological_order) {
|
||||||
|
auto &pd = ports.at(p);
|
||||||
|
for (auto &req : pd.required) {
|
||||||
|
if (pd.arrival.count(req.first)) {
|
||||||
|
auto &arr = pd.arrival.at(req.first);
|
||||||
|
double fmax = 1000.0 / ctx->getDelayNS(arr.value.maxDelay() - req.second.value.minDelay());
|
||||||
|
if (!domain_fmax.count(req.first) || domain_fmax.at(req.first) > fmax)
|
||||||
|
domain_fmax[req.first] = fmax;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto &fm : domain_fmax) {
|
||||||
|
log_info("Domain %s Worst Fmax %.02f\n", ctx->nameOf(domains.at(fm.first).key.clock), fm.second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::compute_slack()
|
||||||
|
{
|
||||||
|
for (auto &dp : domain_pairs) {
|
||||||
|
dp.worst_setup_slack = std::numeric_limits<delay_t>::max();
|
||||||
|
dp.worst_hold_slack = std::numeric_limits<delay_t>::max();
|
||||||
|
}
|
||||||
|
for (auto p : topological_order) {
|
||||||
|
auto &pd = ports.at(p);
|
||||||
|
for (auto &pdp : pd.domain_pairs) {
|
||||||
|
auto &dp = domain_pairs.at(pdp.first);
|
||||||
|
auto &arr = pd.arrival.at(dp.key.launch);
|
||||||
|
auto &req = pd.required.at(dp.key.capture);
|
||||||
|
pdp.second.setup_slack = dp.period.minDelay() - (arr.value.maxDelay() - req.value.minDelay());
|
||||||
|
if (!setup_only)
|
||||||
|
pdp.second.hold_slack = arr.value.minDelay() - req.value.maxDelay();
|
||||||
|
pdp.second.max_path_length = arr.path_length + req.path_length;
|
||||||
|
pd.worst_setup_slack = std::min(pd.worst_setup_slack, pdp.second.setup_slack);
|
||||||
|
dp.worst_setup_slack = std::min(dp.worst_setup_slack, pdp.second.setup_slack);
|
||||||
|
if (!setup_only) {
|
||||||
|
pd.worst_hold_slack = std::min(pd.worst_hold_slack, pdp.second.hold_slack);
|
||||||
|
dp.worst_hold_slack = std::min(dp.worst_hold_slack, pdp.second.hold_slack);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::compute_criticality()
|
||||||
|
{
|
||||||
|
for (auto p : topological_order) {
|
||||||
|
auto &pd = ports.at(p);
|
||||||
|
for (auto &pdp : pd.domain_pairs) {
|
||||||
|
auto &dp = domain_pairs.at(pdp.first);
|
||||||
|
float crit =
|
||||||
|
1.0f - (float(pdp.second.setup_slack) - float(dp.worst_setup_slack)) / float(-dp.worst_setup_slack);
|
||||||
|
crit = std::min(crit, 1.0f);
|
||||||
|
crit = std::max(crit, 0.0f);
|
||||||
|
pdp.second.criticality = crit;
|
||||||
|
pd.worst_crit = std::max(pd.worst_crit, crit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<CellPortKey> TimingAnalyser::get_failing_eps(domain_id_t domain_pair, int count)
|
||||||
|
{
|
||||||
|
std::vector<CellPortKey> failing_eps;
|
||||||
|
delay_t last_slack = std::numeric_limits<delay_t>::min();
|
||||||
|
auto &dp = domain_pairs.at(domain_pair);
|
||||||
|
auto &cap_d = domains.at(dp.key.capture);
|
||||||
|
while (int(failing_eps.size()) < count) {
|
||||||
|
CellPortKey next;
|
||||||
|
delay_t next_slack = std::numeric_limits<delay_t>::max();
|
||||||
|
for (auto ep : cap_d.endpoints) {
|
||||||
|
auto &pd = ports.at(ep.first);
|
||||||
|
if (!pd.domain_pairs.count(domain_pair))
|
||||||
|
continue;
|
||||||
|
delay_t ep_slack = pd.domain_pairs.at(domain_pair).setup_slack;
|
||||||
|
if (ep_slack < next_slack && ep_slack > last_slack) {
|
||||||
|
next = ep.first;
|
||||||
|
next_slack = ep_slack;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (next == CellPortKey())
|
||||||
|
break;
|
||||||
|
failing_eps.push_back(next);
|
||||||
|
last_slack = next_slack;
|
||||||
|
}
|
||||||
|
return failing_eps;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::print_critical_path(CellPortKey endpoint, domain_id_t domain_pair)
|
||||||
|
{
|
||||||
|
CellPortKey cursor = endpoint;
|
||||||
|
auto &dp = domain_pairs.at(domain_pair);
|
||||||
|
log(" endpoint %s.%s (slack %.02fns):\n", ctx->nameOf(cursor.cell), ctx->nameOf(cursor.port),
|
||||||
|
ctx->getDelayNS(ports.at(cursor).domain_pairs.at(domain_pair).setup_slack));
|
||||||
|
while (cursor != CellPortKey()) {
|
||||||
|
log(" %s.%s (net %s)\n", ctx->nameOf(cursor.cell), ctx->nameOf(cursor.port),
|
||||||
|
ctx->nameOf(get_net_or_empty(ctx->cells.at(cursor.cell).get(), cursor.port)));
|
||||||
|
if (!ports.at(cursor).arrival.count(dp.key.launch))
|
||||||
|
break;
|
||||||
|
cursor = ports.at(cursor).arrival.at(dp.key.launch).bwd_max;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
const char *edge_name(ClockEdge edge) { return (edge == FALLING_EDGE) ? "negedge" : "posedge"; }
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void TimingAnalyser::print_report()
|
||||||
|
{
|
||||||
|
for (int i = 0; i < int(domain_pairs.size()); i++) {
|
||||||
|
auto &dp = domain_pairs.at(i);
|
||||||
|
auto &launch = domains.at(dp.key.launch);
|
||||||
|
auto &capture = domains.at(dp.key.capture);
|
||||||
|
log("Worst endpoints for %s %s -> %s %s\n", edge_name(launch.key.edge), ctx->nameOf(launch.key.clock),
|
||||||
|
edge_name(capture.key.edge), ctx->nameOf(capture.key.clock));
|
||||||
|
auto failing_eps = get_failing_eps(i, 5);
|
||||||
|
for (auto &ep : failing_eps)
|
||||||
|
print_critical_path(ep, i);
|
||||||
|
log_break();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
domain_id_t TimingAnalyser::domain_id(IdString cell, IdString clock_port, ClockEdge edge)
|
||||||
|
{
|
||||||
|
return domain_id(ctx->cells.at(cell)->ports.at(clock_port).net, edge);
|
||||||
|
}
|
||||||
|
domain_id_t TimingAnalyser::domain_id(const NetInfo *net, ClockEdge edge)
|
||||||
|
{
|
||||||
|
NPNR_ASSERT(net != nullptr);
|
||||||
|
ClockDomainKey key{net->name, edge};
|
||||||
|
auto inserted = domain_to_id.emplace(key, domains.size());
|
||||||
|
if (inserted.second) {
|
||||||
|
domains.emplace_back(key);
|
||||||
|
}
|
||||||
|
return inserted.first->second;
|
||||||
|
}
|
||||||
|
domain_id_t TimingAnalyser::domain_pair_id(domain_id_t launch, domain_id_t capture)
|
||||||
|
{
|
||||||
|
ClockDomainPairKey key{launch, capture};
|
||||||
|
auto inserted = pair_to_id.emplace(key, domain_pairs.size());
|
||||||
|
if (inserted.second) {
|
||||||
|
domain_pairs.emplace_back(key);
|
||||||
|
}
|
||||||
|
return inserted.first->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TimingAnalyser::copy_domains(const CellPortKey &from, const CellPortKey &to, bool backward)
|
||||||
|
{
|
||||||
|
auto &f = ports.at(from), &t = ports.at(to);
|
||||||
|
for (auto &dom : (backward ? f.required : f.arrival))
|
||||||
|
(backward ? t.required : t.arrival)[dom.first];
|
||||||
|
}
|
||||||
|
|
||||||
|
CellInfo *TimingAnalyser::cell_info(const CellPortKey &key) { return ctx->cells.at(key.cell).get(); }
|
||||||
|
|
||||||
|
PortInfo &TimingAnalyser::port_info(const CellPortKey &key) { return ctx->cells.at(key.cell)->ports.at(key.port); }
|
||||||
|
|
||||||
|
/** LEGACY CODE BEGIN **/
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
struct ClockEvent
|
struct ClockEvent
|
||||||
{
|
{
|
||||||
@ -86,7 +627,6 @@ struct CriticalPath
|
|||||||
};
|
};
|
||||||
|
|
||||||
typedef std::unordered_map<ClockPair, CriticalPath> CriticalPathMap;
|
typedef std::unordered_map<ClockPair, CriticalPath> CriticalPathMap;
|
||||||
typedef std::unordered_map<IdString, NetCriticalityInfo> NetCriticalityMap;
|
|
||||||
|
|
||||||
struct Timing
|
struct Timing
|
||||||
{
|
{
|
||||||
@ -96,7 +636,6 @@ struct Timing
|
|||||||
delay_t min_slack;
|
delay_t min_slack;
|
||||||
CriticalPathMap *crit_path;
|
CriticalPathMap *crit_path;
|
||||||
DelayFrequency *slack_histogram;
|
DelayFrequency *slack_histogram;
|
||||||
NetCriticalityMap *net_crit;
|
|
||||||
IdString async_clock;
|
IdString async_clock;
|
||||||
|
|
||||||
struct TimingData
|
struct TimingData
|
||||||
@ -112,10 +651,9 @@ struct Timing
|
|||||||
};
|
};
|
||||||
|
|
||||||
Timing(Context *ctx, bool net_delays, bool update, CriticalPathMap *crit_path = nullptr,
|
Timing(Context *ctx, bool net_delays, bool update, CriticalPathMap *crit_path = nullptr,
|
||||||
DelayFrequency *slack_histogram = nullptr, NetCriticalityMap *net_crit = nullptr)
|
DelayFrequency *slack_histogram = nullptr)
|
||||||
: ctx(ctx), net_delays(net_delays), update(update), min_slack(1.0e12 / ctx->setting<float>("target_freq")),
|
: ctx(ctx), net_delays(net_delays), update(update), min_slack(1.0e12 / ctx->setting<float>("target_freq")),
|
||||||
crit_path(crit_path), slack_histogram(slack_histogram), net_crit(net_crit),
|
crit_path(crit_path), slack_histogram(slack_histogram), async_clock(ctx->id("$async$"))
|
||||||
async_clock(ctx->id("$async$"))
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -496,156 +1034,6 @@ struct Timing
|
|||||||
std::reverse(cp_ports.begin(), cp_ports.end());
|
std::reverse(cp_ports.begin(), cp_ports.end());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (net_crit) {
|
|
||||||
NPNR_ASSERT(crit_path);
|
|
||||||
// Go through in reverse topological order to set required times
|
|
||||||
for (auto net : boost::adaptors::reverse(topological_order)) {
|
|
||||||
if (!net_data.count(net))
|
|
||||||
continue;
|
|
||||||
auto &nd_map = net_data.at(net);
|
|
||||||
for (auto &startdomain : nd_map) {
|
|
||||||
auto &nd = startdomain.second;
|
|
||||||
if (nd.false_startpoint)
|
|
||||||
continue;
|
|
||||||
if (startdomain.first.clock == async_clock)
|
|
||||||
continue;
|
|
||||||
if (nd.min_required.empty())
|
|
||||||
nd.min_required.resize(net->users.size(), std::numeric_limits<delay_t>::max());
|
|
||||||
delay_t net_min_required = std::numeric_limits<delay_t>::max();
|
|
||||||
for (size_t i = 0; i < net->users.size(); i++) {
|
|
||||||
auto &usr = net->users.at(i);
|
|
||||||
auto net_delay = ctx->getNetinfoRouteDelay(net, usr);
|
|
||||||
int port_clocks;
|
|
||||||
TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, usr.port, port_clocks);
|
|
||||||
if (portClass == TMG_REGISTER_INPUT || portClass == TMG_ENDPOINT) {
|
|
||||||
auto process_endpoint = [&](IdString clksig, ClockEdge edge, delay_t setup) {
|
|
||||||
delay_t period;
|
|
||||||
// Set default period
|
|
||||||
if (edge == startdomain.first.edge) {
|
|
||||||
period = clk_period;
|
|
||||||
} else {
|
|
||||||
period = clk_period / 2;
|
|
||||||
}
|
|
||||||
if (clksig != async_clock) {
|
|
||||||
if (ctx->nets.at(clksig)->clkconstr) {
|
|
||||||
if (edge == startdomain.first.edge) {
|
|
||||||
// same edge
|
|
||||||
period = ctx->nets.at(clksig)->clkconstr->period.minDelay();
|
|
||||||
} else if (edge == RISING_EDGE) {
|
|
||||||
// falling -> rising
|
|
||||||
period = ctx->nets.at(clksig)->clkconstr->low.minDelay();
|
|
||||||
} else if (edge == FALLING_EDGE) {
|
|
||||||
// rising -> falling
|
|
||||||
period = ctx->nets.at(clksig)->clkconstr->high.minDelay();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
nd.min_required.at(i) = std::min(period - setup, nd.min_required.at(i));
|
|
||||||
};
|
|
||||||
if (portClass == TMG_REGISTER_INPUT) {
|
|
||||||
for (int j = 0; j < port_clocks; j++) {
|
|
||||||
TimingClockingInfo clkInfo = ctx->getPortClockingInfo(usr.cell, usr.port, j);
|
|
||||||
const NetInfo *clknet = get_net_or_empty(usr.cell, clkInfo.clock_port);
|
|
||||||
IdString clksig = clknet ? clknet->name : async_clock;
|
|
||||||
process_endpoint(clksig, clknet ? clkInfo.edge : RISING_EDGE,
|
|
||||||
clkInfo.setup.maxDelay());
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
process_endpoint(async_clock, RISING_EDGE, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
net_min_required = std::min(net_min_required, nd.min_required.at(i) - net_delay);
|
|
||||||
}
|
|
||||||
PortRef &drv = net->driver;
|
|
||||||
if (drv.cell == nullptr)
|
|
||||||
continue;
|
|
||||||
for (const auto &port : drv.cell->ports) {
|
|
||||||
if (port.second.type != PORT_IN || !port.second.net)
|
|
||||||
continue;
|
|
||||||
DelayQuad comb_delay;
|
|
||||||
bool is_path = ctx->getCellDelay(drv.cell, port.first, drv.port, comb_delay);
|
|
||||||
if (!is_path)
|
|
||||||
continue;
|
|
||||||
int cc;
|
|
||||||
auto pclass = ctx->getPortTimingClass(drv.cell, port.first, cc);
|
|
||||||
if (pclass != TMG_COMB_INPUT)
|
|
||||||
continue;
|
|
||||||
NetInfo *sink_net = port.second.net;
|
|
||||||
if (net_data.count(sink_net) && net_data.at(sink_net).count(startdomain.first)) {
|
|
||||||
auto &sink_nd = net_data.at(sink_net).at(startdomain.first);
|
|
||||||
if (sink_nd.min_required.empty())
|
|
||||||
sink_nd.min_required.resize(sink_net->users.size(),
|
|
||||||
std::numeric_limits<delay_t>::max());
|
|
||||||
for (size_t i = 0; i < sink_net->users.size(); i++) {
|
|
||||||
auto &user = sink_net->users.at(i);
|
|
||||||
if (user.cell == drv.cell && user.port == port.first) {
|
|
||||||
sink_nd.min_required.at(i) = std::min(sink_nd.min_required.at(i),
|
|
||||||
net_min_required - comb_delay.maxDelay());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::unordered_map<ClockEvent, delay_t> worst_slack;
|
|
||||||
|
|
||||||
// Assign slack values
|
|
||||||
for (auto &net_entry : net_data) {
|
|
||||||
const NetInfo *net = net_entry.first;
|
|
||||||
for (auto &startdomain : net_entry.second) {
|
|
||||||
auto &nd = startdomain.second;
|
|
||||||
if (startdomain.first.clock == async_clock)
|
|
||||||
continue;
|
|
||||||
if (nd.min_required.empty())
|
|
||||||
continue;
|
|
||||||
auto &nc = (*net_crit)[net->name];
|
|
||||||
if (nc.slack.empty())
|
|
||||||
nc.slack.resize(net->users.size(), std::numeric_limits<delay_t>::max());
|
|
||||||
|
|
||||||
for (size_t i = 0; i < net->users.size(); i++) {
|
|
||||||
delay_t slack = nd.min_required.at(i) -
|
|
||||||
(nd.max_arrival + ctx->getNetinfoRouteDelay(net, net->users.at(i)));
|
|
||||||
|
|
||||||
if (worst_slack.count(startdomain.first))
|
|
||||||
worst_slack.at(startdomain.first) = std::min(worst_slack.at(startdomain.first), slack);
|
|
||||||
else
|
|
||||||
worst_slack[startdomain.first] = slack;
|
|
||||||
nc.slack.at(i) = slack;
|
|
||||||
}
|
|
||||||
if (ctx->debug)
|
|
||||||
log_break();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Assign criticality values
|
|
||||||
for (auto &net_entry : net_data) {
|
|
||||||
const NetInfo *net = net_entry.first;
|
|
||||||
for (auto &startdomain : net_entry.second) {
|
|
||||||
if (startdomain.first.clock == async_clock)
|
|
||||||
continue;
|
|
||||||
auto &nd = startdomain.second;
|
|
||||||
if (nd.min_required.empty())
|
|
||||||
continue;
|
|
||||||
auto &nc = (*net_crit)[net->name];
|
|
||||||
if (nc.slack.empty())
|
|
||||||
continue;
|
|
||||||
if (nc.criticality.empty())
|
|
||||||
nc.criticality.resize(net->users.size(), 0);
|
|
||||||
// Only consider intra-clock paths for criticality
|
|
||||||
if (!crit_path->count(ClockPair{startdomain.first, startdomain.first}))
|
|
||||||
continue;
|
|
||||||
delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay;
|
|
||||||
for (size_t i = 0; i < net->users.size(); i++) {
|
|
||||||
float criticality =
|
|
||||||
1.0f - ((float(nc.slack.at(i)) - float(worst_slack.at(startdomain.first))) / dmax);
|
|
||||||
nc.criticality.at(i) = std::min<double>(1.0, std::max<double>(0.0, criticality));
|
|
||||||
}
|
|
||||||
nc.max_path_length = nd.max_path_length;
|
|
||||||
nc.cd_worst_slack = worst_slack.at(startdomain.first);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return min_slack;
|
return min_slack;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -999,12 +1387,4 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_criticalities(Context *ctx, NetCriticalityMap *net_crit)
|
|
||||||
{
|
|
||||||
CriticalPathMap crit_paths;
|
|
||||||
net_crit->clear();
|
|
||||||
Timing timing(ctx, true, true, &crit_paths, nullptr, net_crit);
|
|
||||||
timing.walk_paths();
|
|
||||||
}
|
|
||||||
|
|
||||||
NEXTPNR_NAMESPACE_END
|
NEXTPNR_NAMESPACE_END
|
||||||
|
258
common/timing.h
258
common/timing.h
@ -24,6 +24,251 @@
|
|||||||
|
|
||||||
NEXTPNR_NAMESPACE_BEGIN
|
NEXTPNR_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
struct CellPortKey
|
||||||
|
{
|
||||||
|
CellPortKey(){};
|
||||||
|
CellPortKey(IdString cell, IdString port) : cell(cell), port(port){};
|
||||||
|
explicit CellPortKey(const PortRef &pr)
|
||||||
|
{
|
||||||
|
NPNR_ASSERT(pr.cell != nullptr);
|
||||||
|
cell = pr.cell->name;
|
||||||
|
port = pr.port;
|
||||||
|
}
|
||||||
|
IdString cell, port;
|
||||||
|
struct Hash
|
||||||
|
{
|
||||||
|
inline std::size_t operator()(const CellPortKey &arg) const noexcept
|
||||||
|
{
|
||||||
|
std::size_t seed = std::hash<IdString>()(arg.cell);
|
||||||
|
seed ^= std::hash<IdString>()(arg.port) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
|
||||||
|
return seed;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
inline bool operator==(const CellPortKey &other) const { return (cell == other.cell) && (port == other.port); }
|
||||||
|
inline bool operator!=(const CellPortKey &other) const { return (cell != other.cell) || (port != other.port); }
|
||||||
|
inline bool operator<(const CellPortKey &other) const
|
||||||
|
{
|
||||||
|
return cell == other.cell ? port < other.port : cell < other.cell;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct NetPortKey
|
||||||
|
{
|
||||||
|
IdString net;
|
||||||
|
size_t idx;
|
||||||
|
NetPortKey(){};
|
||||||
|
explicit NetPortKey(IdString net) : net(net), idx(DRIVER_IDX){}; // driver
|
||||||
|
explicit NetPortKey(IdString net, size_t user) : net(net), idx(user){}; // user
|
||||||
|
|
||||||
|
static const size_t DRIVER_IDX = std::numeric_limits<size_t>::max();
|
||||||
|
|
||||||
|
inline bool is_driver() const { return (idx == DRIVER_IDX); }
|
||||||
|
inline size_t user_idx() const
|
||||||
|
{
|
||||||
|
NPNR_ASSERT(idx != DRIVER_IDX);
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Hash
|
||||||
|
{
|
||||||
|
std::size_t operator()(const NetPortKey &arg) const noexcept
|
||||||
|
{
|
||||||
|
std::size_t seed = std::hash<IdString>()(arg.net);
|
||||||
|
seed ^= std::hash<size_t>()(arg.idx) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
|
||||||
|
return seed;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
inline bool operator==(const NetPortKey &other) const { return (net == other.net) && (idx == other.idx); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ClockDomainKey
|
||||||
|
{
|
||||||
|
IdString clock;
|
||||||
|
ClockEdge edge;
|
||||||
|
ClockDomainKey(IdString clock_net, ClockEdge edge) : clock(clock_net), edge(edge){};
|
||||||
|
// probably also need something here to deal with constraints
|
||||||
|
inline bool is_async() const { return clock == IdString(); }
|
||||||
|
|
||||||
|
struct Hash
|
||||||
|
{
|
||||||
|
std::size_t operator()(const ClockDomainKey &arg) const noexcept
|
||||||
|
{
|
||||||
|
std::size_t seed = std::hash<IdString>()(arg.clock);
|
||||||
|
seed ^= std::hash<int>()(int(arg.edge)) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
|
||||||
|
return seed;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
inline bool operator==(const ClockDomainKey &other) const { return (clock == other.clock) && (edge == other.edge); }
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef int domain_id_t;
|
||||||
|
|
||||||
|
struct ClockDomainPairKey
|
||||||
|
{
|
||||||
|
domain_id_t launch, capture;
|
||||||
|
ClockDomainPairKey(domain_id_t launch, domain_id_t capture) : launch(launch), capture(capture){};
|
||||||
|
inline bool operator==(const ClockDomainPairKey &other) const
|
||||||
|
{
|
||||||
|
return (launch == other.launch) && (capture == other.capture);
|
||||||
|
}
|
||||||
|
struct Hash
|
||||||
|
{
|
||||||
|
std::size_t operator()(const ClockDomainPairKey &arg) const noexcept
|
||||||
|
{
|
||||||
|
std::size_t seed = std::hash<domain_id_t>()(arg.launch);
|
||||||
|
seed ^= std::hash<domain_id_t>()(arg.capture) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
|
||||||
|
return seed;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TimingAnalyser
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TimingAnalyser(Context *ctx) : ctx(ctx){};
|
||||||
|
void setup();
|
||||||
|
void run();
|
||||||
|
void print_report();
|
||||||
|
|
||||||
|
float get_criticality(CellPortKey port) const { return ports.at(port).worst_crit; }
|
||||||
|
float get_setup_slack(CellPortKey port) const { return ports.at(port).worst_setup_slack; }
|
||||||
|
float get_domain_setup_slack(CellPortKey port) const
|
||||||
|
{
|
||||||
|
delay_t slack = std::numeric_limits<delay_t>::max();
|
||||||
|
for (const auto &dp : ports.at(port).domain_pairs)
|
||||||
|
slack = std::min(slack, domain_pairs.at(dp.first).worst_setup_slack);
|
||||||
|
return slack;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool setup_only = false;
|
||||||
|
bool verbose_mode = false;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void init_ports();
|
||||||
|
void get_cell_delays();
|
||||||
|
void get_route_delays();
|
||||||
|
void topo_sort();
|
||||||
|
void setup_port_domains();
|
||||||
|
|
||||||
|
void reset_times();
|
||||||
|
|
||||||
|
void walk_forward();
|
||||||
|
void walk_backward();
|
||||||
|
|
||||||
|
void compute_slack();
|
||||||
|
void compute_criticality();
|
||||||
|
|
||||||
|
void print_fmax();
|
||||||
|
// get the N most failing endpoints for a given domain pair
|
||||||
|
std::vector<CellPortKey> get_failing_eps(domain_id_t domain_pair, int count);
|
||||||
|
// print the critical path for an endpoint and domain pair
|
||||||
|
void print_critical_path(CellPortKey endpoint, domain_id_t domain_pair);
|
||||||
|
|
||||||
|
const DelayPair init_delay{std::numeric_limits<delay_t>::max(), std::numeric_limits<delay_t>::lowest()};
|
||||||
|
|
||||||
|
// Set arrival/required times if more/less than the current value
|
||||||
|
void set_arrival_time(CellPortKey target, domain_id_t domain, DelayPair arrival, int path_length,
|
||||||
|
CellPortKey prev = CellPortKey());
|
||||||
|
void set_required_time(CellPortKey target, domain_id_t domain, DelayPair required, int path_length,
|
||||||
|
CellPortKey prev = CellPortKey());
|
||||||
|
|
||||||
|
// To avoid storing the domain tag structure (which could get large when considering more complex constrained tag
|
||||||
|
// cases), assign each domain an ID and use that instead
|
||||||
|
// An arrival or required time entry. Stores both the min/max delays; and the traversal to reach them for critical
|
||||||
|
// path reporting
|
||||||
|
struct ArrivReqTime
|
||||||
|
{
|
||||||
|
DelayPair value;
|
||||||
|
CellPortKey bwd_min, bwd_max;
|
||||||
|
int path_length;
|
||||||
|
};
|
||||||
|
// Data per port-domain tuple
|
||||||
|
struct PortDomainPairData
|
||||||
|
{
|
||||||
|
delay_t setup_slack = std::numeric_limits<delay_t>::max(), hold_slack = std::numeric_limits<delay_t>::max();
|
||||||
|
delay_t budget = std::numeric_limits<delay_t>::max();
|
||||||
|
int max_path_length = 0;
|
||||||
|
float criticality = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// A cell timing arc, used to cache cell timings and reduce the number of potentially-expensive Arch API calls
|
||||||
|
struct CellArc
|
||||||
|
{
|
||||||
|
|
||||||
|
enum ArcType
|
||||||
|
{
|
||||||
|
COMBINATIONAL,
|
||||||
|
SETUP,
|
||||||
|
HOLD,
|
||||||
|
CLK_TO_Q
|
||||||
|
} type;
|
||||||
|
|
||||||
|
IdString other_port;
|
||||||
|
DelayQuad value;
|
||||||
|
// Clock polarity, not used for combinational arcs
|
||||||
|
ClockEdge edge;
|
||||||
|
|
||||||
|
CellArc(ArcType type, IdString other_port, DelayQuad value)
|
||||||
|
: type(type), other_port(other_port), value(value), edge(RISING_EDGE){};
|
||||||
|
CellArc(ArcType type, IdString other_port, DelayQuad value, ClockEdge edge)
|
||||||
|
: type(type), other_port(other_port), value(value), edge(edge){};
|
||||||
|
};
|
||||||
|
|
||||||
|
// Timing data for every cell port
|
||||||
|
struct PerPort
|
||||||
|
{
|
||||||
|
CellPortKey cell_port;
|
||||||
|
NetPortKey net_port;
|
||||||
|
PortType type;
|
||||||
|
// per domain timings
|
||||||
|
std::unordered_map<domain_id_t, ArrivReqTime> arrival;
|
||||||
|
std::unordered_map<domain_id_t, ArrivReqTime> required;
|
||||||
|
std::unordered_map<domain_id_t, PortDomainPairData> domain_pairs;
|
||||||
|
// cell timing arcs to (outputs)/from (inputs) from this port
|
||||||
|
std::vector<CellArc> cell_arcs;
|
||||||
|
// routing delay into this port (input ports only)
|
||||||
|
DelayPair route_delay;
|
||||||
|
// worst criticality and slack across domain pairs
|
||||||
|
float worst_crit;
|
||||||
|
delay_t worst_setup_slack, worst_hold_slack;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PerDomain
|
||||||
|
{
|
||||||
|
PerDomain(ClockDomainKey key) : key(key){};
|
||||||
|
ClockDomainKey key;
|
||||||
|
// these are pairs (signal port; clock port)
|
||||||
|
std::vector<std::pair<CellPortKey, IdString>> startpoints, endpoints;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PerDomainPair
|
||||||
|
{
|
||||||
|
PerDomainPair(ClockDomainPairKey key) : key(key){};
|
||||||
|
ClockDomainPairKey key;
|
||||||
|
DelayPair period;
|
||||||
|
delay_t worst_setup_slack, worst_hold_slack;
|
||||||
|
};
|
||||||
|
|
||||||
|
CellInfo *cell_info(const CellPortKey &key);
|
||||||
|
PortInfo &port_info(const CellPortKey &key);
|
||||||
|
|
||||||
|
domain_id_t domain_id(IdString cell, IdString clock_port, ClockEdge edge);
|
||||||
|
domain_id_t domain_id(const NetInfo *net, ClockEdge edge);
|
||||||
|
domain_id_t domain_pair_id(domain_id_t launch, domain_id_t capture);
|
||||||
|
|
||||||
|
void copy_domains(const CellPortKey &from, const CellPortKey &to, bool backwards);
|
||||||
|
|
||||||
|
std::unordered_map<CellPortKey, PerPort, CellPortKey::Hash> ports;
|
||||||
|
std::unordered_map<ClockDomainKey, domain_id_t, ClockDomainKey::Hash> domain_to_id;
|
||||||
|
std::unordered_map<ClockDomainPairKey, domain_id_t, ClockDomainPairKey::Hash> pair_to_id;
|
||||||
|
std::vector<PerDomain> domains;
|
||||||
|
std::vector<PerDomainPair> domain_pairs;
|
||||||
|
|
||||||
|
std::vector<CellPortKey> topological_order;
|
||||||
|
|
||||||
|
Context *ctx;
|
||||||
|
};
|
||||||
|
|
||||||
// Evenly redistribute the total path slack amongst all sinks on each path
|
// Evenly redistribute the total path slack amongst all sinks on each path
|
||||||
void assign_budget(Context *ctx, bool quiet = false);
|
void assign_budget(Context *ctx, bool quiet = false);
|
||||||
|
|
||||||
@ -32,19 +277,6 @@ void assign_budget(Context *ctx, bool quiet = false);
|
|||||||
void timing_analysis(Context *ctx, bool slack_histogram = true, bool print_fmax = true, bool print_path = false,
|
void timing_analysis(Context *ctx, bool slack_histogram = true, bool print_fmax = true, bool print_path = false,
|
||||||
bool warn_on_failure = false);
|
bool warn_on_failure = false);
|
||||||
|
|
||||||
// Data for the timing optimisation algorithm
|
|
||||||
struct NetCriticalityInfo
|
|
||||||
{
|
|
||||||
// One each per user
|
|
||||||
std::vector<delay_t> slack;
|
|
||||||
std::vector<float> criticality;
|
|
||||||
unsigned max_path_length = 0;
|
|
||||||
delay_t cd_worst_slack = std::numeric_limits<delay_t>::max();
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef std::unordered_map<IdString, NetCriticalityInfo> NetCriticalityMap;
|
|
||||||
void get_criticalities(Context *ctx, NetCriticalityMap *net_crit);
|
|
||||||
|
|
||||||
NEXTPNR_NAMESPACE_END
|
NEXTPNR_NAMESPACE_END
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -79,16 +79,17 @@ NEXTPNR_NAMESPACE_BEGIN
|
|||||||
class TimingOptimiser
|
class TimingOptimiser
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg){};
|
TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg), tmg(ctx){};
|
||||||
bool optimise()
|
bool optimise()
|
||||||
{
|
{
|
||||||
log_info("Running timing-driven placement optimisation...\n");
|
log_info("Running timing-driven placement optimisation...\n");
|
||||||
ctx->lock();
|
ctx->lock();
|
||||||
if (ctx->verbose)
|
if (ctx->verbose)
|
||||||
timing_analysis(ctx, false, true, false, false);
|
timing_analysis(ctx, false, true, false, false);
|
||||||
|
tmg.setup();
|
||||||
for (int i = 0; i < 30; i++) {
|
for (int i = 0; i < 30; i++) {
|
||||||
log_info(" Iteration %d...\n", i);
|
log_info(" Iteration %d...\n", i);
|
||||||
get_criticalities(ctx, &net_crit);
|
tmg.run();
|
||||||
setup_delay_limits();
|
setup_delay_limits();
|
||||||
auto crit_paths = find_crit_paths(0.98, 50000);
|
auto crit_paths = find_crit_paths(0.98, 50000);
|
||||||
for (auto &path : crit_paths)
|
for (auto &path : crit_paths)
|
||||||
@ -109,18 +110,14 @@ class TimingOptimiser
|
|||||||
for (auto usr : ni->users) {
|
for (auto usr : ni->users) {
|
||||||
max_net_delay[std::make_pair(usr.cell->name, usr.port)] = std::numeric_limits<delay_t>::max();
|
max_net_delay[std::make_pair(usr.cell->name, usr.port)] = std::numeric_limits<delay_t>::max();
|
||||||
}
|
}
|
||||||
if (!net_crit.count(net.first))
|
|
||||||
continue;
|
|
||||||
auto &nc = net_crit.at(net.first);
|
|
||||||
if (nc.slack.empty())
|
|
||||||
continue;
|
|
||||||
for (size_t i = 0; i < ni->users.size(); i++) {
|
for (size_t i = 0; i < ni->users.size(); i++) {
|
||||||
auto &usr = ni->users.at(i);
|
auto &usr = ni->users.at(i);
|
||||||
delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr);
|
delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr);
|
||||||
if (nc.max_path_length != 0) {
|
delay_t slack = tmg.get_setup_slack(CellPortKey(usr));
|
||||||
max_net_delay[std::make_pair(usr.cell->name, usr.port)] =
|
delay_t domain_slack = tmg.get_domain_setup_slack(CellPortKey(usr));
|
||||||
net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / 10);
|
if (slack == std::numeric_limits<delay_t>::max())
|
||||||
}
|
continue;
|
||||||
|
max_net_delay[std::make_pair(usr.cell->name, usr.port)] = net_delay + ((slack - domain_slack) / 10);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -283,12 +280,18 @@ class TimingOptimiser
|
|||||||
for (auto net : netnames) {
|
for (auto net : netnames) {
|
||||||
if (crit_nets.size() >= max_count)
|
if (crit_nets.size() >= max_count)
|
||||||
break;
|
break;
|
||||||
if (!net_crit.count(net))
|
float highest_crit = 0;
|
||||||
continue;
|
size_t crit_user_idx = 0;
|
||||||
auto crit_user = std::max_element(net_crit[net].criticality.begin(), net_crit[net].criticality.end());
|
NetInfo *ni = ctx->nets.at(net).get();
|
||||||
if (*crit_user > crit_thresh)
|
for (size_t i = 0; i < ni->users.size(); i++) {
|
||||||
crit_nets.push_back(
|
float crit = tmg.get_criticality(CellPortKey(ni->users.at(i)));
|
||||||
std::make_pair(ctx->nets[net].get(), crit_user - net_crit[net].criticality.begin()));
|
if (crit > highest_crit) {
|
||||||
|
highest_crit = crit;
|
||||||
|
crit_user_idx = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (highest_crit > crit_thresh)
|
||||||
|
crit_nets.push_back(std::make_pair(ni, crit_user_idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto port_user_index = [](CellInfo *cell, PortInfo &port) -> size_t {
|
auto port_user_index = [](CellInfo *cell, PortInfo &port) -> size_t {
|
||||||
@ -325,8 +328,6 @@ class TimingOptimiser
|
|||||||
NetInfo *pn = port.second.net;
|
NetInfo *pn = port.second.net;
|
||||||
if (pn == nullptr)
|
if (pn == nullptr)
|
||||||
continue;
|
continue;
|
||||||
if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty())
|
|
||||||
continue;
|
|
||||||
int ccount;
|
int ccount;
|
||||||
DelayQuad combDelay;
|
DelayQuad combDelay;
|
||||||
TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount);
|
TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount);
|
||||||
@ -336,7 +337,7 @@ class TimingOptimiser
|
|||||||
if (!is_path)
|
if (!is_path)
|
||||||
continue;
|
continue;
|
||||||
size_t user_idx = port_user_index(cell, port.second);
|
size_t user_idx = port_user_index(cell, port.second);
|
||||||
float usr_crit = net_crit.at(pn->name).criticality.at(user_idx);
|
float usr_crit = tmg.get_criticality(CellPortKey(cell->name, port.first));
|
||||||
if (used_ports.count(&(pn->users.at(user_idx))))
|
if (used_ports.count(&(pn->users.at(user_idx))))
|
||||||
continue;
|
continue;
|
||||||
if (usr_crit >= max_crit) {
|
if (usr_crit >= max_crit) {
|
||||||
@ -364,8 +365,7 @@ class TimingOptimiser
|
|||||||
NetInfo *pn = port.second.net;
|
NetInfo *pn = port.second.net;
|
||||||
if (pn == nullptr)
|
if (pn == nullptr)
|
||||||
continue;
|
continue;
|
||||||
if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty())
|
|
||||||
continue;
|
|
||||||
int ccount;
|
int ccount;
|
||||||
DelayQuad combDelay;
|
DelayQuad combDelay;
|
||||||
TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount);
|
TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount);
|
||||||
@ -374,12 +374,12 @@ class TimingOptimiser
|
|||||||
bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay);
|
bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay);
|
||||||
if (!is_path)
|
if (!is_path)
|
||||||
continue;
|
continue;
|
||||||
auto &crits = net_crit.at(pn->name).criticality;
|
for (size_t i = 0; i < pn->users.size(); i++) {
|
||||||
for (size_t i = 0; i < crits.size(); i++) {
|
|
||||||
if (used_ports.count(&(pn->users.at(i))))
|
if (used_ports.count(&(pn->users.at(i))))
|
||||||
continue;
|
continue;
|
||||||
if (crits.at(i) >= max_crit) {
|
float crit = tmg.get_criticality(CellPortKey(pn->users.at(i)));
|
||||||
max_crit = crits.at(i);
|
if (crit >= max_crit) {
|
||||||
|
max_crit = crit;
|
||||||
crit_sink = std::make_pair(pn, i);
|
crit_sink = std::make_pair(pn, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -420,12 +420,7 @@ class TimingOptimiser
|
|||||||
|
|
||||||
for (auto port : path) {
|
for (auto port : path) {
|
||||||
if (ctx->debug) {
|
if (ctx->debug) {
|
||||||
float crit = 0;
|
float crit = tmg.get_criticality(CellPortKey(*port));
|
||||||
NetInfo *pn = port->cell->ports.at(port->port).net;
|
|
||||||
if (net_crit.count(pn->name) && !net_crit.at(pn->name).criticality.empty())
|
|
||||||
for (size_t i = 0; i < pn->users.size(); i++)
|
|
||||||
if (pn->users.at(i).cell == port->cell && pn->users.at(i).port == port->port)
|
|
||||||
crit = net_crit.at(pn->name).criticality.at(i);
|
|
||||||
log_info(" %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx),
|
log_info(" %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx),
|
||||||
ctx->nameOfBel(port->cell->bel), crit);
|
ctx->nameOfBel(port->cell->bel), crit);
|
||||||
}
|
}
|
||||||
@ -613,10 +608,9 @@ class TimingOptimiser
|
|||||||
std::unordered_map<BelId, std::unordered_set<IdString>> bel_candidate_cells;
|
std::unordered_map<BelId, std::unordered_set<IdString>> bel_candidate_cells;
|
||||||
// Map cell ports to net delay limit
|
// Map cell ports to net delay limit
|
||||||
std::unordered_map<std::pair<IdString, IdString>, delay_t> max_net_delay;
|
std::unordered_map<std::pair<IdString, IdString>, delay_t> max_net_delay;
|
||||||
// Criticality data from timing analysis
|
|
||||||
NetCriticalityMap net_crit;
|
|
||||||
Context *ctx;
|
Context *ctx;
|
||||||
TimingOptCfg cfg;
|
TimingOptCfg cfg;
|
||||||
|
TimingAnalyser tmg;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx, cfg).optimise(); }
|
bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx, cfg).optimise(); }
|
||||||
|
@ -181,6 +181,98 @@ template <typename ForwardRange> inline auto get_only_value(ForwardRange r)
|
|||||||
return get_only_value(b, e);
|
return get_only_value(b, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// From Yosys
|
||||||
|
// https://github.com/YosysHQ/yosys/blob/0fb4224ebca86156a1296b9210116d9a9cbebeed/kernel/utils.h#L131
|
||||||
|
template <typename T, typename C = std::less<T>> struct TopoSort
|
||||||
|
{
|
||||||
|
bool analyze_loops, found_loops;
|
||||||
|
std::map<T, std::set<T, C>, C> database;
|
||||||
|
std::set<std::set<T, C>> loops;
|
||||||
|
std::vector<T> sorted;
|
||||||
|
|
||||||
|
TopoSort()
|
||||||
|
{
|
||||||
|
analyze_loops = true;
|
||||||
|
found_loops = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void node(T n)
|
||||||
|
{
|
||||||
|
if (database.count(n) == 0)
|
||||||
|
database[n] = std::set<T, C>();
|
||||||
|
}
|
||||||
|
|
||||||
|
void edge(T left, T right)
|
||||||
|
{
|
||||||
|
node(left);
|
||||||
|
database[right].insert(left);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sort_worker(const T &n, std::set<T, C> &marked_cells, std::set<T, C> &active_cells,
|
||||||
|
std::vector<T> &active_stack)
|
||||||
|
{
|
||||||
|
if (active_cells.count(n)) {
|
||||||
|
found_loops = true;
|
||||||
|
if (analyze_loops) {
|
||||||
|
std::set<T, C> loop;
|
||||||
|
for (int i = int(active_stack.size()) - 1; i >= 0; i--) {
|
||||||
|
loop.insert(active_stack[i]);
|
||||||
|
if (active_stack[i] == n)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
loops.insert(loop);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (marked_cells.count(n))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!database.at(n).empty()) {
|
||||||
|
if (analyze_loops)
|
||||||
|
active_stack.push_back(n);
|
||||||
|
active_cells.insert(n);
|
||||||
|
|
||||||
|
for (auto &left_n : database.at(n))
|
||||||
|
sort_worker(left_n, marked_cells, active_cells, active_stack);
|
||||||
|
|
||||||
|
if (analyze_loops)
|
||||||
|
active_stack.pop_back();
|
||||||
|
active_cells.erase(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
marked_cells.insert(n);
|
||||||
|
sorted.push_back(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool sort()
|
||||||
|
{
|
||||||
|
loops.clear();
|
||||||
|
sorted.clear();
|
||||||
|
found_loops = false;
|
||||||
|
|
||||||
|
std::set<T, C> marked_cells;
|
||||||
|
std::set<T, C> active_cells;
|
||||||
|
std::vector<T> active_stack;
|
||||||
|
|
||||||
|
for (auto &it : database)
|
||||||
|
sort_worker(it.first, marked_cells, active_cells, active_stack);
|
||||||
|
|
||||||
|
NPNR_ASSERT(sorted.size() == database.size());
|
||||||
|
return !found_loops;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct reversed_range_t
|
||||||
|
{
|
||||||
|
T &obj;
|
||||||
|
explicit reversed_range_t(T &obj) : obj(obj){};
|
||||||
|
auto begin() { return obj.rbegin(); }
|
||||||
|
auto end() { return obj.rend(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> reversed_range_t<T> reversed_range(T &obj) { return reversed_range_t<T>(obj); }
|
||||||
|
|
||||||
NEXTPNR_NAMESPACE_END
|
NEXTPNR_NAMESPACE_END
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -95,8 +95,8 @@ bool Arch::isBelLocationValid(BelId bel) const
|
|||||||
|
|
||||||
void Arch::permute_luts()
|
void Arch::permute_luts()
|
||||||
{
|
{
|
||||||
NetCriticalityMap nc;
|
TimingAnalyser tmg(getCtx());
|
||||||
get_criticalities(getCtx(), &nc);
|
tmg.setup();
|
||||||
|
|
||||||
std::unordered_map<PortInfo *, size_t> port_to_user;
|
std::unordered_map<PortInfo *, size_t> port_to_user;
|
||||||
for (auto net : sorted(nets)) {
|
for (auto net : sorted(nets)) {
|
||||||
@ -121,13 +121,7 @@ void Arch::permute_luts()
|
|||||||
ci->ports[port_names.at(i)].type = PORT_IN;
|
ci->ports[port_names.at(i)].type = PORT_IN;
|
||||||
}
|
}
|
||||||
auto &port = ci->ports.at(port_names.at(i));
|
auto &port = ci->ports.at(port_names.at(i));
|
||||||
float crit = 0;
|
float crit = (port.net == nullptr) ? 0 : tmg.get_criticality(CellPortKey(ci->name, port_names.at(i)));
|
||||||
if (port.net != nullptr && nc.count(port.net->name)) {
|
|
||||||
auto &n = nc.at(port.net->name);
|
|
||||||
size_t usr = port_to_user.at(&port);
|
|
||||||
if (usr < n.criticality.size())
|
|
||||||
crit = n.criticality.at(usr);
|
|
||||||
}
|
|
||||||
orig_nets.push_back(port.net);
|
orig_nets.push_back(port.net);
|
||||||
inputs.emplace_back(crit, i);
|
inputs.emplace_back(crit, i);
|
||||||
}
|
}
|
||||||
|
@ -28,9 +28,9 @@ NEXTPNR_NAMESPACE_BEGIN
|
|||||||
struct NexusPostPlaceOpt
|
struct NexusPostPlaceOpt
|
||||||
{
|
{
|
||||||
Context *ctx;
|
Context *ctx;
|
||||||
NetCriticalityMap net_crit;
|
TimingAnalyser tmg;
|
||||||
|
|
||||||
NexusPostPlaceOpt(Context *ctx) : ctx(ctx){};
|
NexusPostPlaceOpt(Context *ctx) : ctx(ctx), tmg(ctx){};
|
||||||
|
|
||||||
inline bool is_constrained(CellInfo *cell)
|
inline bool is_constrained(CellInfo *cell)
|
||||||
{
|
{
|
||||||
@ -139,7 +139,7 @@ struct NexusPostPlaceOpt
|
|||||||
|
|
||||||
void operator()()
|
void operator()()
|
||||||
{
|
{
|
||||||
get_criticalities(ctx, &net_crit);
|
tmg.setup();
|
||||||
opt_lutffs();
|
opt_lutffs();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user