Merge pull request #609 from YosysHQ/gatecat/sta-v2

Use new timing engine for criticality
This commit is contained in:
gatecat 2021-03-09 08:48:12 +00:00 committed by GitHub
commit 326b34887c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 943 additions and 254 deletions

View File

@ -552,6 +552,10 @@ struct DelayPair
{ {
return {min_delay + other.min_delay, max_delay + other.max_delay}; return {min_delay + other.min_delay, max_delay + other.max_delay};
} }
DelayPair operator-(const DelayPair &other) const
{
return {min_delay - other.min_delay, max_delay - other.max_delay};
}
}; };
// four-quadrant, min and max rise and fall delay // four-quadrant, min and max rise and fall delay
@ -575,6 +579,7 @@ struct DelayQuad
DelayPair delayPair() const { return DelayPair(minDelay(), maxDelay()); }; DelayPair delayPair() const { return DelayPair(minDelay(), maxDelay()); };
DelayQuad operator+(const DelayQuad &other) const { return {rise + other.rise, fall + other.fall}; } DelayQuad operator+(const DelayQuad &other) const { return {rise + other.rise, fall + other.fall}; }
DelayQuad operator-(const DelayQuad &other) const { return {rise - other.rise, fall - other.fall}; }
}; };
struct ClockConstraint; struct ClockConstraint;

View File

@ -78,7 +78,7 @@ class SAPlacer
public: public:
SAPlacer(Context *ctx, Placer1Cfg cfg) SAPlacer(Context *ctx, Placer1Cfg cfg)
: ctx(ctx), fast_bels(ctx, /*check_bel_available=*/false, cfg.minBelsForGridPick), cfg(cfg) : ctx(ctx), fast_bels(ctx, /*check_bel_available=*/false, cfg.minBelsForGridPick), cfg(cfg), tmg(ctx)
{ {
for (auto bel : ctx->getBels()) { for (auto bel : ctx->getBels()) {
Loc loc = ctx->getBelLocation(bel); Loc loc = ctx->getBelLocation(bel);
@ -241,8 +241,9 @@ class SAPlacer
auto saplace_start = std::chrono::high_resolution_clock::now(); auto saplace_start = std::chrono::high_resolution_clock::now();
// Invoke timing analysis to obtain criticalities // Invoke timing analysis to obtain criticalities
tmg.setup_only = true;
if (!cfg.budgetBased) if (!cfg.budgetBased)
get_criticalities(ctx, &net_crit); tmg.setup();
// Calculate costs after initial placement // Calculate costs after initial placement
setup_costs(); setup_costs();
@ -379,7 +380,7 @@ class SAPlacer
// Invoke timing analysis to obtain criticalities // Invoke timing analysis to obtain criticalities
if (!cfg.budgetBased && cfg.timing_driven) if (!cfg.budgetBased && cfg.timing_driven)
get_criticalities(ctx, &net_crit); tmg.run();
// Need to rebuild costs after criticalities change // Need to rebuild costs after criticalities change
setup_costs(); setup_costs();
// Reset incremental bounds // Reset incremental bounds
@ -836,11 +837,9 @@ class SAPlacer
double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user))); double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user)));
return std::min(10.0, std::exp(delay - ctx->getDelayNS(net->users.at(user).budget) / 10)); return std::min(10.0, std::exp(delay - ctx->getDelayNS(net->users.at(user).budget) / 10));
} else { } else {
auto crit = net_crit.find(net->name); float crit = tmg.get_criticality(CellPortKey(net->users.at(user)));
if (crit == net_crit.end() || crit->second.criticality.empty())
return 0;
double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user))); double delay = ctx->getDelayNS(ctx->predictDelay(net, net->users.at(user)));
return delay * std::pow(crit->second.criticality.at(user), crit_exp); return delay * std::pow(crit, crit_exp);
} }
} }
@ -1216,9 +1215,6 @@ class SAPlacer
wirelen_t last_wirelen_cost, curr_wirelen_cost; wirelen_t last_wirelen_cost, curr_wirelen_cost;
double last_timing_cost, curr_timing_cost; double last_timing_cost, curr_timing_cost;
// Criticality data from timing analysis
NetCriticalityMap net_crit;
Context *ctx; Context *ctx;
float temp = 10; float temp = 10;
float crit_exp = 8; float crit_exp = 8;
@ -1235,6 +1231,8 @@ class SAPlacer
bool require_legal = true; bool require_legal = true;
const int legalise_dia = 4; const int legalise_dia = 4;
Placer1Cfg cfg; Placer1Cfg cfg;
TimingAnalyser tmg;
}; };
Placer1Cfg::Placer1Cfg(Context *ctx) Placer1Cfg::Placer1Cfg(Context *ctx)

View File

@ -139,9 +139,12 @@ template <typename T> struct EquationSystem
class HeAPPlacer class HeAPPlacer
{ {
public: public:
HeAPPlacer(Context *ctx, PlacerHeapCfg cfg) : ctx(ctx), cfg(cfg), fast_bels(ctx, /*check_bel_available=*/true, -1) HeAPPlacer(Context *ctx, PlacerHeapCfg cfg)
: ctx(ctx), cfg(cfg), fast_bels(ctx, /*check_bel_available=*/true, -1), tmg(ctx)
{ {
Eigen::initParallel(); Eigen::initParallel();
tmg.setup_only = true;
tmg.setup();
} }
bool place() bool place()
@ -269,7 +272,7 @@ class HeAPPlacer
// Update timing weights // Update timing weights
if (cfg.timing_driven) if (cfg.timing_driven)
get_criticalities(ctx, &net_crit); tmg.run();
if (legal_hpwl < best_hpwl) { if (legal_hpwl < best_hpwl) {
best_hpwl = legal_hpwl; best_hpwl = legal_hpwl;
@ -355,6 +358,8 @@ class HeAPPlacer
FastBels fast_bels; FastBels fast_bels;
std::unordered_map<IdString, std::tuple<int, int>> bel_types; std::unordered_map<IdString, std::tuple<int, int>> bel_types;
TimingAnalyser tmg;
struct BoundingBox struct BoundingBox
{ {
// Actual bounding box // Actual bounding box
@ -392,8 +397,6 @@ class HeAPPlacer
// Performance counting // Performance counting
double solve_time = 0, cl_time = 0, sl_time = 0; double solve_time = 0, cl_time = 0, sl_time = 0;
NetCriticalityMap net_crit;
// Place cells with the BEL attribute set to constrain them // Place cells with the BEL attribute set to constrain them
void place_constraints() void place_constraints()
{ {
@ -736,11 +739,9 @@ class HeAPPlacer
std::max<double>(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) * std::max<double>(1, (yaxis ? cfg.hpwl_scale_y : cfg.hpwl_scale_x) *
std::abs(o_pos - this_pos))); std::abs(o_pos - this_pos)));
if (user_idx != -1 && net_crit.count(ni->name)) { if (user_idx != -1) {
auto &nc = net_crit.at(ni->name); weight *= (1.0 + cfg.timingWeight * std::pow(tmg.get_criticality(CellPortKey(port)),
if (user_idx < int(nc.criticality.size())) cfg.criticalityExponent));
weight *= (1.0 + cfg.timingWeight *
std::pow(nc.criticality.at(user_idx), cfg.criticalityExponent));
} }
// If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation, // If cell 0 is not fixed, it will stamp +w on its equation and -w on the other end's equation,

View File

@ -112,16 +112,14 @@ struct Router2
Context *ctx; Context *ctx;
Router2Cfg cfg; Router2Cfg cfg;
Router2(Context *ctx, const Router2Cfg &cfg) : ctx(ctx), cfg(cfg) {} Router2(Context *ctx, const Router2Cfg &cfg) : ctx(ctx), cfg(cfg), tmg(ctx) { tmg.setup(); }
// Use 'udata' for fast net lookups and indexing // Use 'udata' for fast net lookups and indexing
std::vector<NetInfo *> nets_by_udata; std::vector<NetInfo *> nets_by_udata;
std::vector<PerNetData> nets; std::vector<PerNetData> nets;
bool timing_driven; bool timing_driven;
TimingAnalyser tmg;
// Criticality data from timing analysis
NetCriticalityMap net_crit;
void setup_nets() void setup_nets()
{ {
@ -1175,18 +1173,13 @@ struct Router2
if (timing_driven && (int(route_queue.size()) > (int(nets_by_udata.size()) / 50))) { if (timing_driven && (int(route_queue.size()) > (int(nets_by_udata.size()) / 50))) {
// Heuristic: reduce runtime by skipping STA in the case of a "long tail" of a few // Heuristic: reduce runtime by skipping STA in the case of a "long tail" of a few
// congested nodes // congested nodes
get_criticalities(ctx, &net_crit); tmg.run();
for (auto n : route_queue) { for (auto n : route_queue) {
IdString name = nets_by_udata.at(n)->name; NetInfo *ni = nets_by_udata.at(n);
auto fnd = net_crit.find(name);
auto &net = nets.at(n); auto &net = nets.at(n);
net.max_crit = 0; net.max_crit = 0;
if (fnd == net_crit.end()) for (auto &usr : ni->users) {
continue; float c = tmg.get_criticality(CellPortKey(usr));
for (int i = 0; i < int(fnd->second.criticality.size()); i++) {
float c = fnd->second.criticality.at(i);
for (auto &a : net.arcs.at(i))
a.arc_crit = c;
net.max_crit = std::max(net.max_crit, c); net.max_crit = std::max(net.max_crit, c);
} }
} }

View File

@ -30,6 +30,547 @@
NEXTPNR_NAMESPACE_BEGIN NEXTPNR_NAMESPACE_BEGIN
void TimingAnalyser::setup()
{
init_ports();
get_cell_delays();
topo_sort();
setup_port_domains();
run();
}
void TimingAnalyser::run()
{
reset_times();
get_route_delays();
walk_forward();
walk_backward();
compute_slack();
compute_criticality();
}
void TimingAnalyser::init_ports()
{
// Per cell port structures
for (auto cell : sorted(ctx->cells)) {
CellInfo *ci = cell.second;
for (auto port : sorted_ref(ci->ports)) {
auto &data = ports[CellPortKey(ci->name, port.first)];
data.type = port.second.type;
data.cell_port = CellPortKey(ci->name, port.first);
}
}
// Cell port to net port mapping
for (auto net : sorted(ctx->nets)) {
NetInfo *ni = net.second;
if (ni->driver.cell != nullptr)
ports[CellPortKey(ni->driver)].net_port = NetPortKey(ni->name);
for (size_t i = 0; i < ni->users.size(); i++)
ports[CellPortKey(ni->users.at(i))].net_port = NetPortKey(ni->name, i);
}
}
void TimingAnalyser::get_cell_delays()
{
for (auto &port : ports) {
CellInfo *ci = cell_info(port.first);
auto &pi = port_info(port.first);
auto &pd = port.second;
IdString name = port.first.port;
// Ignore dangling ports altogether for timing purposes
if (pd.net_port.net == IdString())
continue;
pd.cell_arcs.clear();
int clkInfoCount = 0;
TimingPortClass cls = ctx->getPortTimingClass(ci, name, clkInfoCount);
if (cls == TMG_STARTPOINT || cls == TMG_ENDPOINT || cls == TMG_CLOCK_INPUT || cls == TMG_GEN_CLOCK ||
cls == TMG_IGNORE)
continue;
if (pi.type == PORT_IN) {
// Input ports might have setup/hold relationships
if (cls == TMG_REGISTER_INPUT) {
for (int i = 0; i < clkInfoCount; i++) {
auto info = ctx->getPortClockingInfo(ci, name, i);
if (!ci->ports.count(info.clock_port) || ci->ports.at(info.clock_port).net == nullptr)
continue;
pd.cell_arcs.emplace_back(CellArc::SETUP, info.clock_port, DelayQuad(info.setup, info.setup),
info.edge);
pd.cell_arcs.emplace_back(CellArc::HOLD, info.clock_port, DelayQuad(info.hold, info.hold),
info.edge);
}
}
// Combinational delays through cell
for (auto &other_port : ci->ports) {
auto &op = other_port.second;
// ignore dangling ports and non-outputs
if (op.net == nullptr || op.type != PORT_OUT)
continue;
DelayQuad delay;
bool is_path = ctx->getCellDelay(ci, name, other_port.first, delay);
if (is_path)
pd.cell_arcs.emplace_back(CellArc::COMBINATIONAL, other_port.first, delay);
}
} else if (pi.type == PORT_OUT) {
// Output ports might have clk-to-q relationships
if (cls == TMG_REGISTER_OUTPUT) {
for (int i = 0; i < clkInfoCount; i++) {
auto info = ctx->getPortClockingInfo(ci, name, i);
if (!ci->ports.count(info.clock_port) || ci->ports.at(info.clock_port).net == nullptr)
continue;
pd.cell_arcs.emplace_back(CellArc::CLK_TO_Q, info.clock_port, info.clockToQ, info.edge);
}
}
// Combinational delays through cell
for (auto &other_port : ci->ports) {
auto &op = other_port.second;
// ignore dangling ports and non-inputs
if (op.net == nullptr || op.type != PORT_IN)
continue;
DelayQuad delay;
bool is_path = ctx->getCellDelay(ci, other_port.first, name, delay);
if (is_path)
pd.cell_arcs.emplace_back(CellArc::COMBINATIONAL, other_port.first, delay);
}
}
}
}
void TimingAnalyser::get_route_delays()
{
for (auto net : sorted(ctx->nets)) {
NetInfo *ni = net.second;
if (ni->driver.cell == nullptr || ni->driver.cell->bel == BelId())
continue;
for (auto &usr : ni->users) {
if (usr.cell->bel == BelId())
continue;
ports.at(CellPortKey(usr)).route_delay = DelayPair(ctx->getNetinfoRouteDelay(ni, usr));
}
}
}
void TimingAnalyser::topo_sort()
{
TopoSort<CellPortKey> topo;
for (auto &port : ports) {
auto &pd = port.second;
// All ports are nodes
topo.node(port.first);
if (pd.type == PORT_IN) {
// inputs: combinational arcs through the cell are edges
for (auto &arc : pd.cell_arcs) {
if (arc.type != CellArc::COMBINATIONAL)
continue;
topo.edge(port.first, CellPortKey(port.first.cell, arc.other_port));
}
} else if (pd.type == PORT_OUT) {
// output: routing arcs are edges
const NetInfo *pn = port_info(port.first).net;
if (pn != nullptr) {
for (auto &usr : pn->users)
topo.edge(port.first, CellPortKey(usr));
}
}
}
bool no_loops = topo.sort();
if (!no_loops && verbose_mode) {
log_info("Found %d combinational loops:\n", int(topo.loops.size()));
int i = 0;
for (auto &loop : topo.loops) {
log_info(" loop %d:\n", ++i);
for (auto &port : loop) {
log_info(" %s.%s (%s)\n", ctx->nameOf(port.cell), ctx->nameOf(port.port),
ctx->nameOf(port_info(port).net));
}
}
}
std::swap(topological_order, topo.sorted);
}
void TimingAnalyser::setup_port_domains()
{
for (auto &d : domains) {
d.startpoints.clear();
d.endpoints.clear();
}
// Go forward through the topological order (domains from the PoV of arrival time)
for (auto port : topological_order) {
auto &pd = ports.at(port);
auto &pi = port_info(port);
if (pi.type == PORT_OUT) {
for (auto &fanin : pd.cell_arcs) {
if (fanin.type != CellArc::CLK_TO_Q)
continue;
// registered outputs are startpoints
auto dom = domain_id(port.cell, fanin.other_port, fanin.edge);
// create per-domain data
pd.arrival[dom];
domains.at(dom).startpoints.emplace_back(port, fanin.other_port);
}
// copy domains across routing
if (pi.net != nullptr)
for (auto &usr : pi.net->users)
copy_domains(port, CellPortKey(usr), false);
} else {
// copy domains from input to output
for (auto &fanout : pd.cell_arcs) {
if (fanout.type != CellArc::COMBINATIONAL)
continue;
copy_domains(port, CellPortKey(port.cell, fanout.other_port), false);
}
}
}
// Go backward through the topological order (domains from the PoV of required time)
for (auto port : reversed_range(topological_order)) {
auto &pd = ports.at(port);
auto &pi = port_info(port);
if (pi.type == PORT_OUT) {
// copy domains from output to input
for (auto &fanin : pd.cell_arcs) {
if (fanin.type != CellArc::COMBINATIONAL)
continue;
copy_domains(port, CellPortKey(port.cell, fanin.other_port), true);
}
} else {
for (auto &fanout : pd.cell_arcs) {
if (fanout.type != CellArc::SETUP)
continue;
// registered inputs are startpoints
auto dom = domain_id(port.cell, fanout.other_port, fanout.edge);
// create per-domain data
pd.required[dom];
domains.at(dom).endpoints.emplace_back(port, fanout.other_port);
}
// copy port to driver
if (pi.net != nullptr && pi.net->driver.cell != nullptr)
copy_domains(port, CellPortKey(pi.net->driver), true);
}
}
// Iterate over ports and find domain paris
for (auto port : topological_order) {
auto &pd = ports.at(port);
for (auto &arr : pd.arrival)
for (auto &req : pd.required) {
pd.domain_pairs[domain_pair_id(arr.first, req.first)];
}
}
}
void TimingAnalyser::reset_times()
{
for (auto &port : ports) {
auto do_reset = [&](std::unordered_map<domain_id_t, ArrivReqTime> &times) {
for (auto &t : times) {
t.second.value = init_delay;
t.second.path_length = 0;
t.second.bwd_min = CellPortKey();
t.second.bwd_max = CellPortKey();
}
};
do_reset(port.second.arrival);
do_reset(port.second.required);
for (auto &dp : port.second.domain_pairs) {
dp.second.setup_slack = std::numeric_limits<delay_t>::max();
dp.second.hold_slack = std::numeric_limits<delay_t>::max();
dp.second.max_path_length = 0;
dp.second.criticality = 0;
dp.second.budget = 0;
}
port.second.worst_crit = 0;
port.second.worst_setup_slack = std::numeric_limits<delay_t>::max();
port.second.worst_hold_slack = std::numeric_limits<delay_t>::max();
}
}
void TimingAnalyser::set_arrival_time(CellPortKey target, domain_id_t domain, DelayPair arrival, int path_length,
CellPortKey prev)
{
auto &arr = ports.at(target).arrival.at(domain);
if (arrival.max_delay > arr.value.max_delay) {
arr.value.max_delay = arrival.max_delay;
arr.bwd_max = prev;
}
if (!setup_only && (arrival.min_delay < arr.value.min_delay)) {
arr.value.min_delay = arrival.min_delay;
arr.bwd_min = prev;
}
arr.path_length = std::max(arr.path_length, path_length);
}
void TimingAnalyser::set_required_time(CellPortKey target, domain_id_t domain, DelayPair required, int path_length,
CellPortKey prev)
{
auto &req = ports.at(target).required.at(domain);
if (required.min_delay < req.value.min_delay) {
req.value.min_delay = required.min_delay;
req.bwd_min = prev;
}
if (!setup_only && (required.max_delay > req.value.max_delay)) {
req.value.max_delay = required.max_delay;
req.bwd_max = prev;
}
req.path_length = std::max(req.path_length, path_length);
}
void TimingAnalyser::walk_forward()
{
// Assign initial arrival time to domain startpoints
for (domain_id_t dom_id = 0; dom_id < domain_id_t(domains.size()); ++dom_id) {
auto &dom = domains.at(dom_id);
for (auto &sp : dom.startpoints) {
auto &pd = ports.at(sp.first);
DelayPair init_arrival(0);
CellPortKey clock_key;
// TODO: clock routing delay, if analysis of that is enabled
if (sp.second != IdString()) {
// clocked startpoints have a clock-to-out time
for (auto &fanin : pd.cell_arcs) {
if (fanin.type == CellArc::CLK_TO_Q && fanin.other_port == sp.second) {
init_arrival = init_arrival + fanin.value.delayPair();
break;
}
}
clock_key = CellPortKey(sp.first.cell, sp.second);
}
set_arrival_time(sp.first, dom_id, init_arrival, 1, clock_key);
}
}
// Walk forward in topological order
for (auto p : topological_order) {
auto &pd = ports.at(p);
for (auto &arr : pd.arrival) {
if (pd.type == PORT_OUT) {
// Output port: propagate delay through net, adding route delay
NetInfo *net = port_info(p).net;
if (net != nullptr)
for (auto &usr : net->users) {
CellPortKey usr_key(usr);
auto &usr_pd = ports.at(usr_key);
set_arrival_time(usr_key, arr.first, arr.second.value + usr_pd.route_delay,
arr.second.path_length, p);
}
} else if (pd.type == PORT_IN) {
// Input port; propagate delay through cell, adding combinational delay
for (auto &fanout : pd.cell_arcs) {
if (fanout.type != CellArc::COMBINATIONAL)
continue;
set_arrival_time(CellPortKey(p.cell, fanout.other_port), arr.first,
arr.second.value + fanout.value.delayPair(), arr.second.path_length + 1, p);
}
}
}
}
}
void TimingAnalyser::walk_backward()
{
// Assign initial required time to domain endpoints
// Note that clock frequency will be considered later in the analysis for, for now all required times are normalised
// to 0ns
for (domain_id_t dom_id = 0; dom_id < domain_id_t(domains.size()); ++dom_id) {
auto &dom = domains.at(dom_id);
for (auto &ep : dom.endpoints) {
auto &pd = ports.at(ep.first);
DelayPair init_setuphold(0);
CellPortKey clock_key;
// TODO: clock routing delay, if analysis of that is enabled
if (ep.second != IdString()) {
// Add setup/hold time, if this endpoint is clocked
for (auto &fanin : pd.cell_arcs) {
if (fanin.type == CellArc::SETUP && fanin.other_port == ep.second)
init_setuphold.min_delay -= fanin.value.maxDelay();
if (fanin.type == CellArc::HOLD && fanin.other_port == ep.second)
init_setuphold.max_delay -= fanin.value.maxDelay();
}
clock_key = CellPortKey(ep.first.cell, ep.second);
}
set_required_time(ep.first, dom_id, init_setuphold, 1, clock_key);
}
}
// Walk backwards in topological order
for (auto p : reversed_range(topological_order)) {
auto &pd = ports.at(p);
for (auto &req : pd.required) {
if (pd.type == PORT_IN) {
// Input port: propagate delay back through net, subtracting route delay
NetInfo *net = port_info(p).net;
if (net != nullptr && net->driver.cell != nullptr)
set_required_time(CellPortKey(net->driver), req.first, req.second.value - pd.route_delay,
req.second.path_length, p);
} else if (pd.type == PORT_OUT) {
// Output port : propagate delay back through cell, subtracting combinational delay
for (auto &fanin : pd.cell_arcs) {
if (fanin.type != CellArc::COMBINATIONAL)
continue;
set_required_time(CellPortKey(p.cell, fanin.other_port), req.first,
req.second.value - fanin.value.delayPair(), req.second.path_length + 1, p);
}
}
}
}
}
void TimingAnalyser::print_fmax()
{
// Temporary testing code for comparison only
std::unordered_map<int, double> domain_fmax;
for (auto p : topological_order) {
auto &pd = ports.at(p);
for (auto &req : pd.required) {
if (pd.arrival.count(req.first)) {
auto &arr = pd.arrival.at(req.first);
double fmax = 1000.0 / ctx->getDelayNS(arr.value.maxDelay() - req.second.value.minDelay());
if (!domain_fmax.count(req.first) || domain_fmax.at(req.first) > fmax)
domain_fmax[req.first] = fmax;
}
}
}
for (auto &fm : domain_fmax) {
log_info("Domain %s Worst Fmax %.02f\n", ctx->nameOf(domains.at(fm.first).key.clock), fm.second);
}
}
void TimingAnalyser::compute_slack()
{
for (auto &dp : domain_pairs) {
dp.worst_setup_slack = std::numeric_limits<delay_t>::max();
dp.worst_hold_slack = std::numeric_limits<delay_t>::max();
}
for (auto p : topological_order) {
auto &pd = ports.at(p);
for (auto &pdp : pd.domain_pairs) {
auto &dp = domain_pairs.at(pdp.first);
auto &arr = pd.arrival.at(dp.key.launch);
auto &req = pd.required.at(dp.key.capture);
pdp.second.setup_slack = dp.period.minDelay() - (arr.value.maxDelay() - req.value.minDelay());
if (!setup_only)
pdp.second.hold_slack = arr.value.minDelay() - req.value.maxDelay();
pdp.second.max_path_length = arr.path_length + req.path_length;
pd.worst_setup_slack = std::min(pd.worst_setup_slack, pdp.second.setup_slack);
dp.worst_setup_slack = std::min(dp.worst_setup_slack, pdp.second.setup_slack);
if (!setup_only) {
pd.worst_hold_slack = std::min(pd.worst_hold_slack, pdp.second.hold_slack);
dp.worst_hold_slack = std::min(dp.worst_hold_slack, pdp.second.hold_slack);
}
}
}
}
void TimingAnalyser::compute_criticality()
{
for (auto p : topological_order) {
auto &pd = ports.at(p);
for (auto &pdp : pd.domain_pairs) {
auto &dp = domain_pairs.at(pdp.first);
float crit =
1.0f - (float(pdp.second.setup_slack) - float(dp.worst_setup_slack)) / float(-dp.worst_setup_slack);
crit = std::min(crit, 1.0f);
crit = std::max(crit, 0.0f);
pdp.second.criticality = crit;
pd.worst_crit = std::max(pd.worst_crit, crit);
}
}
}
std::vector<CellPortKey> TimingAnalyser::get_failing_eps(domain_id_t domain_pair, int count)
{
std::vector<CellPortKey> failing_eps;
delay_t last_slack = std::numeric_limits<delay_t>::min();
auto &dp = domain_pairs.at(domain_pair);
auto &cap_d = domains.at(dp.key.capture);
while (int(failing_eps.size()) < count) {
CellPortKey next;
delay_t next_slack = std::numeric_limits<delay_t>::max();
for (auto ep : cap_d.endpoints) {
auto &pd = ports.at(ep.first);
if (!pd.domain_pairs.count(domain_pair))
continue;
delay_t ep_slack = pd.domain_pairs.at(domain_pair).setup_slack;
if (ep_slack < next_slack && ep_slack > last_slack) {
next = ep.first;
next_slack = ep_slack;
}
}
if (next == CellPortKey())
break;
failing_eps.push_back(next);
last_slack = next_slack;
}
return failing_eps;
}
void TimingAnalyser::print_critical_path(CellPortKey endpoint, domain_id_t domain_pair)
{
CellPortKey cursor = endpoint;
auto &dp = domain_pairs.at(domain_pair);
log(" endpoint %s.%s (slack %.02fns):\n", ctx->nameOf(cursor.cell), ctx->nameOf(cursor.port),
ctx->getDelayNS(ports.at(cursor).domain_pairs.at(domain_pair).setup_slack));
while (cursor != CellPortKey()) {
log(" %s.%s (net %s)\n", ctx->nameOf(cursor.cell), ctx->nameOf(cursor.port),
ctx->nameOf(get_net_or_empty(ctx->cells.at(cursor.cell).get(), cursor.port)));
if (!ports.at(cursor).arrival.count(dp.key.launch))
break;
cursor = ports.at(cursor).arrival.at(dp.key.launch).bwd_max;
}
}
namespace {
const char *edge_name(ClockEdge edge) { return (edge == FALLING_EDGE) ? "negedge" : "posedge"; }
} // namespace
void TimingAnalyser::print_report()
{
for (int i = 0; i < int(domain_pairs.size()); i++) {
auto &dp = domain_pairs.at(i);
auto &launch = domains.at(dp.key.launch);
auto &capture = domains.at(dp.key.capture);
log("Worst endpoints for %s %s -> %s %s\n", edge_name(launch.key.edge), ctx->nameOf(launch.key.clock),
edge_name(capture.key.edge), ctx->nameOf(capture.key.clock));
auto failing_eps = get_failing_eps(i, 5);
for (auto &ep : failing_eps)
print_critical_path(ep, i);
log_break();
}
}
domain_id_t TimingAnalyser::domain_id(IdString cell, IdString clock_port, ClockEdge edge)
{
return domain_id(ctx->cells.at(cell)->ports.at(clock_port).net, edge);
}
domain_id_t TimingAnalyser::domain_id(const NetInfo *net, ClockEdge edge)
{
NPNR_ASSERT(net != nullptr);
ClockDomainKey key{net->name, edge};
auto inserted = domain_to_id.emplace(key, domains.size());
if (inserted.second) {
domains.emplace_back(key);
}
return inserted.first->second;
}
domain_id_t TimingAnalyser::domain_pair_id(domain_id_t launch, domain_id_t capture)
{
ClockDomainPairKey key{launch, capture};
auto inserted = pair_to_id.emplace(key, domain_pairs.size());
if (inserted.second) {
domain_pairs.emplace_back(key);
}
return inserted.first->second;
}
void TimingAnalyser::copy_domains(const CellPortKey &from, const CellPortKey &to, bool backward)
{
auto &f = ports.at(from), &t = ports.at(to);
for (auto &dom : (backward ? f.required : f.arrival))
(backward ? t.required : t.arrival)[dom.first];
}
CellInfo *TimingAnalyser::cell_info(const CellPortKey &key) { return ctx->cells.at(key.cell).get(); }
PortInfo &TimingAnalyser::port_info(const CellPortKey &key) { return ctx->cells.at(key.cell)->ports.at(key.port); }
/** LEGACY CODE BEGIN **/
namespace { namespace {
struct ClockEvent struct ClockEvent
{ {
@ -86,7 +627,6 @@ struct CriticalPath
}; };
typedef std::unordered_map<ClockPair, CriticalPath> CriticalPathMap; typedef std::unordered_map<ClockPair, CriticalPath> CriticalPathMap;
typedef std::unordered_map<IdString, NetCriticalityInfo> NetCriticalityMap;
struct Timing struct Timing
{ {
@ -96,7 +636,6 @@ struct Timing
delay_t min_slack; delay_t min_slack;
CriticalPathMap *crit_path; CriticalPathMap *crit_path;
DelayFrequency *slack_histogram; DelayFrequency *slack_histogram;
NetCriticalityMap *net_crit;
IdString async_clock; IdString async_clock;
struct TimingData struct TimingData
@ -112,10 +651,9 @@ struct Timing
}; };
Timing(Context *ctx, bool net_delays, bool update, CriticalPathMap *crit_path = nullptr, Timing(Context *ctx, bool net_delays, bool update, CriticalPathMap *crit_path = nullptr,
DelayFrequency *slack_histogram = nullptr, NetCriticalityMap *net_crit = nullptr) DelayFrequency *slack_histogram = nullptr)
: ctx(ctx), net_delays(net_delays), update(update), min_slack(1.0e12 / ctx->setting<float>("target_freq")), : ctx(ctx), net_delays(net_delays), update(update), min_slack(1.0e12 / ctx->setting<float>("target_freq")),
crit_path(crit_path), slack_histogram(slack_histogram), net_crit(net_crit), crit_path(crit_path), slack_histogram(slack_histogram), async_clock(ctx->id("$async$"))
async_clock(ctx->id("$async$"))
{ {
} }
@ -496,156 +1034,6 @@ struct Timing
std::reverse(cp_ports.begin(), cp_ports.end()); std::reverse(cp_ports.begin(), cp_ports.end());
} }
} }
if (net_crit) {
NPNR_ASSERT(crit_path);
// Go through in reverse topological order to set required times
for (auto net : boost::adaptors::reverse(topological_order)) {
if (!net_data.count(net))
continue;
auto &nd_map = net_data.at(net);
for (auto &startdomain : nd_map) {
auto &nd = startdomain.second;
if (nd.false_startpoint)
continue;
if (startdomain.first.clock == async_clock)
continue;
if (nd.min_required.empty())
nd.min_required.resize(net->users.size(), std::numeric_limits<delay_t>::max());
delay_t net_min_required = std::numeric_limits<delay_t>::max();
for (size_t i = 0; i < net->users.size(); i++) {
auto &usr = net->users.at(i);
auto net_delay = ctx->getNetinfoRouteDelay(net, usr);
int port_clocks;
TimingPortClass portClass = ctx->getPortTimingClass(usr.cell, usr.port, port_clocks);
if (portClass == TMG_REGISTER_INPUT || portClass == TMG_ENDPOINT) {
auto process_endpoint = [&](IdString clksig, ClockEdge edge, delay_t setup) {
delay_t period;
// Set default period
if (edge == startdomain.first.edge) {
period = clk_period;
} else {
period = clk_period / 2;
}
if (clksig != async_clock) {
if (ctx->nets.at(clksig)->clkconstr) {
if (edge == startdomain.first.edge) {
// same edge
period = ctx->nets.at(clksig)->clkconstr->period.minDelay();
} else if (edge == RISING_EDGE) {
// falling -> rising
period = ctx->nets.at(clksig)->clkconstr->low.minDelay();
} else if (edge == FALLING_EDGE) {
// rising -> falling
period = ctx->nets.at(clksig)->clkconstr->high.minDelay();
}
}
}
nd.min_required.at(i) = std::min(period - setup, nd.min_required.at(i));
};
if (portClass == TMG_REGISTER_INPUT) {
for (int j = 0; j < port_clocks; j++) {
TimingClockingInfo clkInfo = ctx->getPortClockingInfo(usr.cell, usr.port, j);
const NetInfo *clknet = get_net_or_empty(usr.cell, clkInfo.clock_port);
IdString clksig = clknet ? clknet->name : async_clock;
process_endpoint(clksig, clknet ? clkInfo.edge : RISING_EDGE,
clkInfo.setup.maxDelay());
}
} else {
process_endpoint(async_clock, RISING_EDGE, 0);
}
}
net_min_required = std::min(net_min_required, nd.min_required.at(i) - net_delay);
}
PortRef &drv = net->driver;
if (drv.cell == nullptr)
continue;
for (const auto &port : drv.cell->ports) {
if (port.second.type != PORT_IN || !port.second.net)
continue;
DelayQuad comb_delay;
bool is_path = ctx->getCellDelay(drv.cell, port.first, drv.port, comb_delay);
if (!is_path)
continue;
int cc;
auto pclass = ctx->getPortTimingClass(drv.cell, port.first, cc);
if (pclass != TMG_COMB_INPUT)
continue;
NetInfo *sink_net = port.second.net;
if (net_data.count(sink_net) && net_data.at(sink_net).count(startdomain.first)) {
auto &sink_nd = net_data.at(sink_net).at(startdomain.first);
if (sink_nd.min_required.empty())
sink_nd.min_required.resize(sink_net->users.size(),
std::numeric_limits<delay_t>::max());
for (size_t i = 0; i < sink_net->users.size(); i++) {
auto &user = sink_net->users.at(i);
if (user.cell == drv.cell && user.port == port.first) {
sink_nd.min_required.at(i) = std::min(sink_nd.min_required.at(i),
net_min_required - comb_delay.maxDelay());
break;
}
}
}
}
}
}
std::unordered_map<ClockEvent, delay_t> worst_slack;
// Assign slack values
for (auto &net_entry : net_data) {
const NetInfo *net = net_entry.first;
for (auto &startdomain : net_entry.second) {
auto &nd = startdomain.second;
if (startdomain.first.clock == async_clock)
continue;
if (nd.min_required.empty())
continue;
auto &nc = (*net_crit)[net->name];
if (nc.slack.empty())
nc.slack.resize(net->users.size(), std::numeric_limits<delay_t>::max());
for (size_t i = 0; i < net->users.size(); i++) {
delay_t slack = nd.min_required.at(i) -
(nd.max_arrival + ctx->getNetinfoRouteDelay(net, net->users.at(i)));
if (worst_slack.count(startdomain.first))
worst_slack.at(startdomain.first) = std::min(worst_slack.at(startdomain.first), slack);
else
worst_slack[startdomain.first] = slack;
nc.slack.at(i) = slack;
}
if (ctx->debug)
log_break();
}
}
// Assign criticality values
for (auto &net_entry : net_data) {
const NetInfo *net = net_entry.first;
for (auto &startdomain : net_entry.second) {
if (startdomain.first.clock == async_clock)
continue;
auto &nd = startdomain.second;
if (nd.min_required.empty())
continue;
auto &nc = (*net_crit)[net->name];
if (nc.slack.empty())
continue;
if (nc.criticality.empty())
nc.criticality.resize(net->users.size(), 0);
// Only consider intra-clock paths for criticality
if (!crit_path->count(ClockPair{startdomain.first, startdomain.first}))
continue;
delay_t dmax = crit_path->at(ClockPair{startdomain.first, startdomain.first}).path_delay;
for (size_t i = 0; i < net->users.size(); i++) {
float criticality =
1.0f - ((float(nc.slack.at(i)) - float(worst_slack.at(startdomain.first))) / dmax);
nc.criticality.at(i) = std::min<double>(1.0, std::max<double>(0.0, criticality));
}
nc.max_path_length = nd.max_path_length;
nc.cd_worst_slack = worst_slack.at(startdomain.first);
}
}
}
return min_slack; return min_slack;
} }
@ -999,12 +1387,4 @@ void timing_analysis(Context *ctx, bool print_histogram, bool print_fmax, bool p
} }
} }
void get_criticalities(Context *ctx, NetCriticalityMap *net_crit)
{
CriticalPathMap crit_paths;
net_crit->clear();
Timing timing(ctx, true, true, &crit_paths, nullptr, net_crit);
timing.walk_paths();
}
NEXTPNR_NAMESPACE_END NEXTPNR_NAMESPACE_END

View File

@ -24,6 +24,251 @@
NEXTPNR_NAMESPACE_BEGIN NEXTPNR_NAMESPACE_BEGIN
struct CellPortKey
{
CellPortKey(){};
CellPortKey(IdString cell, IdString port) : cell(cell), port(port){};
explicit CellPortKey(const PortRef &pr)
{
NPNR_ASSERT(pr.cell != nullptr);
cell = pr.cell->name;
port = pr.port;
}
IdString cell, port;
struct Hash
{
inline std::size_t operator()(const CellPortKey &arg) const noexcept
{
std::size_t seed = std::hash<IdString>()(arg.cell);
seed ^= std::hash<IdString>()(arg.port) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
return seed;
}
};
inline bool operator==(const CellPortKey &other) const { return (cell == other.cell) && (port == other.port); }
inline bool operator!=(const CellPortKey &other) const { return (cell != other.cell) || (port != other.port); }
inline bool operator<(const CellPortKey &other) const
{
return cell == other.cell ? port < other.port : cell < other.cell;
}
};
struct NetPortKey
{
IdString net;
size_t idx;
NetPortKey(){};
explicit NetPortKey(IdString net) : net(net), idx(DRIVER_IDX){}; // driver
explicit NetPortKey(IdString net, size_t user) : net(net), idx(user){}; // user
static const size_t DRIVER_IDX = std::numeric_limits<size_t>::max();
inline bool is_driver() const { return (idx == DRIVER_IDX); }
inline size_t user_idx() const
{
NPNR_ASSERT(idx != DRIVER_IDX);
return idx;
}
struct Hash
{
std::size_t operator()(const NetPortKey &arg) const noexcept
{
std::size_t seed = std::hash<IdString>()(arg.net);
seed ^= std::hash<size_t>()(arg.idx) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
return seed;
}
};
inline bool operator==(const NetPortKey &other) const { return (net == other.net) && (idx == other.idx); }
};
struct ClockDomainKey
{
IdString clock;
ClockEdge edge;
ClockDomainKey(IdString clock_net, ClockEdge edge) : clock(clock_net), edge(edge){};
// probably also need something here to deal with constraints
inline bool is_async() const { return clock == IdString(); }
struct Hash
{
std::size_t operator()(const ClockDomainKey &arg) const noexcept
{
std::size_t seed = std::hash<IdString>()(arg.clock);
seed ^= std::hash<int>()(int(arg.edge)) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
return seed;
}
};
inline bool operator==(const ClockDomainKey &other) const { return (clock == other.clock) && (edge == other.edge); }
};
typedef int domain_id_t;
struct ClockDomainPairKey
{
domain_id_t launch, capture;
ClockDomainPairKey(domain_id_t launch, domain_id_t capture) : launch(launch), capture(capture){};
inline bool operator==(const ClockDomainPairKey &other) const
{
return (launch == other.launch) && (capture == other.capture);
}
struct Hash
{
std::size_t operator()(const ClockDomainPairKey &arg) const noexcept
{
std::size_t seed = std::hash<domain_id_t>()(arg.launch);
seed ^= std::hash<domain_id_t>()(arg.capture) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
return seed;
}
};
};
struct TimingAnalyser
{
public:
TimingAnalyser(Context *ctx) : ctx(ctx){};
void setup();
void run();
void print_report();
float get_criticality(CellPortKey port) const { return ports.at(port).worst_crit; }
float get_setup_slack(CellPortKey port) const { return ports.at(port).worst_setup_slack; }
float get_domain_setup_slack(CellPortKey port) const
{
delay_t slack = std::numeric_limits<delay_t>::max();
for (const auto &dp : ports.at(port).domain_pairs)
slack = std::min(slack, domain_pairs.at(dp.first).worst_setup_slack);
return slack;
}
bool setup_only = false;
bool verbose_mode = false;
private:
void init_ports();
void get_cell_delays();
void get_route_delays();
void topo_sort();
void setup_port_domains();
void reset_times();
void walk_forward();
void walk_backward();
void compute_slack();
void compute_criticality();
void print_fmax();
// get the N most failing endpoints for a given domain pair
std::vector<CellPortKey> get_failing_eps(domain_id_t domain_pair, int count);
// print the critical path for an endpoint and domain pair
void print_critical_path(CellPortKey endpoint, domain_id_t domain_pair);
const DelayPair init_delay{std::numeric_limits<delay_t>::max(), std::numeric_limits<delay_t>::lowest()};
// Set arrival/required times if more/less than the current value
void set_arrival_time(CellPortKey target, domain_id_t domain, DelayPair arrival, int path_length,
CellPortKey prev = CellPortKey());
void set_required_time(CellPortKey target, domain_id_t domain, DelayPair required, int path_length,
CellPortKey prev = CellPortKey());
// To avoid storing the domain tag structure (which could get large when considering more complex constrained tag
// cases), assign each domain an ID and use that instead
// An arrival or required time entry. Stores both the min/max delays; and the traversal to reach them for critical
// path reporting
struct ArrivReqTime
{
DelayPair value;
CellPortKey bwd_min, bwd_max;
int path_length;
};
// Data per port-domain tuple
struct PortDomainPairData
{
delay_t setup_slack = std::numeric_limits<delay_t>::max(), hold_slack = std::numeric_limits<delay_t>::max();
delay_t budget = std::numeric_limits<delay_t>::max();
int max_path_length = 0;
float criticality = 0;
};
// A cell timing arc, used to cache cell timings and reduce the number of potentially-expensive Arch API calls
struct CellArc
{
enum ArcType
{
COMBINATIONAL,
SETUP,
HOLD,
CLK_TO_Q
} type;
IdString other_port;
DelayQuad value;
// Clock polarity, not used for combinational arcs
ClockEdge edge;
CellArc(ArcType type, IdString other_port, DelayQuad value)
: type(type), other_port(other_port), value(value), edge(RISING_EDGE){};
CellArc(ArcType type, IdString other_port, DelayQuad value, ClockEdge edge)
: type(type), other_port(other_port), value(value), edge(edge){};
};
// Timing data for every cell port
struct PerPort
{
CellPortKey cell_port;
NetPortKey net_port;
PortType type;
// per domain timings
std::unordered_map<domain_id_t, ArrivReqTime> arrival;
std::unordered_map<domain_id_t, ArrivReqTime> required;
std::unordered_map<domain_id_t, PortDomainPairData> domain_pairs;
// cell timing arcs to (outputs)/from (inputs) from this port
std::vector<CellArc> cell_arcs;
// routing delay into this port (input ports only)
DelayPair route_delay;
// worst criticality and slack across domain pairs
float worst_crit;
delay_t worst_setup_slack, worst_hold_slack;
};
struct PerDomain
{
PerDomain(ClockDomainKey key) : key(key){};
ClockDomainKey key;
// these are pairs (signal port; clock port)
std::vector<std::pair<CellPortKey, IdString>> startpoints, endpoints;
};
struct PerDomainPair
{
PerDomainPair(ClockDomainPairKey key) : key(key){};
ClockDomainPairKey key;
DelayPair period;
delay_t worst_setup_slack, worst_hold_slack;
};
CellInfo *cell_info(const CellPortKey &key);
PortInfo &port_info(const CellPortKey &key);
domain_id_t domain_id(IdString cell, IdString clock_port, ClockEdge edge);
domain_id_t domain_id(const NetInfo *net, ClockEdge edge);
domain_id_t domain_pair_id(domain_id_t launch, domain_id_t capture);
void copy_domains(const CellPortKey &from, const CellPortKey &to, bool backwards);
std::unordered_map<CellPortKey, PerPort, CellPortKey::Hash> ports;
std::unordered_map<ClockDomainKey, domain_id_t, ClockDomainKey::Hash> domain_to_id;
std::unordered_map<ClockDomainPairKey, domain_id_t, ClockDomainPairKey::Hash> pair_to_id;
std::vector<PerDomain> domains;
std::vector<PerDomainPair> domain_pairs;
std::vector<CellPortKey> topological_order;
Context *ctx;
};
// Evenly redistribute the total path slack amongst all sinks on each path // Evenly redistribute the total path slack amongst all sinks on each path
void assign_budget(Context *ctx, bool quiet = false); void assign_budget(Context *ctx, bool quiet = false);
@ -32,19 +277,6 @@ void assign_budget(Context *ctx, bool quiet = false);
void timing_analysis(Context *ctx, bool slack_histogram = true, bool print_fmax = true, bool print_path = false, void timing_analysis(Context *ctx, bool slack_histogram = true, bool print_fmax = true, bool print_path = false,
bool warn_on_failure = false); bool warn_on_failure = false);
// Data for the timing optimisation algorithm
struct NetCriticalityInfo
{
// One each per user
std::vector<delay_t> slack;
std::vector<float> criticality;
unsigned max_path_length = 0;
delay_t cd_worst_slack = std::numeric_limits<delay_t>::max();
};
typedef std::unordered_map<IdString, NetCriticalityInfo> NetCriticalityMap;
void get_criticalities(Context *ctx, NetCriticalityMap *net_crit);
NEXTPNR_NAMESPACE_END NEXTPNR_NAMESPACE_END
#endif #endif

View File

@ -79,16 +79,17 @@ NEXTPNR_NAMESPACE_BEGIN
class TimingOptimiser class TimingOptimiser
{ {
public: public:
TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg){}; TimingOptimiser(Context *ctx, TimingOptCfg cfg) : ctx(ctx), cfg(cfg), tmg(ctx){};
bool optimise() bool optimise()
{ {
log_info("Running timing-driven placement optimisation...\n"); log_info("Running timing-driven placement optimisation...\n");
ctx->lock(); ctx->lock();
if (ctx->verbose) if (ctx->verbose)
timing_analysis(ctx, false, true, false, false); timing_analysis(ctx, false, true, false, false);
tmg.setup();
for (int i = 0; i < 30; i++) { for (int i = 0; i < 30; i++) {
log_info(" Iteration %d...\n", i); log_info(" Iteration %d...\n", i);
get_criticalities(ctx, &net_crit); tmg.run();
setup_delay_limits(); setup_delay_limits();
auto crit_paths = find_crit_paths(0.98, 50000); auto crit_paths = find_crit_paths(0.98, 50000);
for (auto &path : crit_paths) for (auto &path : crit_paths)
@ -109,18 +110,14 @@ class TimingOptimiser
for (auto usr : ni->users) { for (auto usr : ni->users) {
max_net_delay[std::make_pair(usr.cell->name, usr.port)] = std::numeric_limits<delay_t>::max(); max_net_delay[std::make_pair(usr.cell->name, usr.port)] = std::numeric_limits<delay_t>::max();
} }
if (!net_crit.count(net.first))
continue;
auto &nc = net_crit.at(net.first);
if (nc.slack.empty())
continue;
for (size_t i = 0; i < ni->users.size(); i++) { for (size_t i = 0; i < ni->users.size(); i++) {
auto &usr = ni->users.at(i); auto &usr = ni->users.at(i);
delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr); delay_t net_delay = ctx->getNetinfoRouteDelay(ni, usr);
if (nc.max_path_length != 0) { delay_t slack = tmg.get_setup_slack(CellPortKey(usr));
max_net_delay[std::make_pair(usr.cell->name, usr.port)] = delay_t domain_slack = tmg.get_domain_setup_slack(CellPortKey(usr));
net_delay + ((nc.slack.at(i) - nc.cd_worst_slack) / 10); if (slack == std::numeric_limits<delay_t>::max())
} continue;
max_net_delay[std::make_pair(usr.cell->name, usr.port)] = net_delay + ((slack - domain_slack) / 10);
} }
} }
} }
@ -283,12 +280,18 @@ class TimingOptimiser
for (auto net : netnames) { for (auto net : netnames) {
if (crit_nets.size() >= max_count) if (crit_nets.size() >= max_count)
break; break;
if (!net_crit.count(net)) float highest_crit = 0;
continue; size_t crit_user_idx = 0;
auto crit_user = std::max_element(net_crit[net].criticality.begin(), net_crit[net].criticality.end()); NetInfo *ni = ctx->nets.at(net).get();
if (*crit_user > crit_thresh) for (size_t i = 0; i < ni->users.size(); i++) {
crit_nets.push_back( float crit = tmg.get_criticality(CellPortKey(ni->users.at(i)));
std::make_pair(ctx->nets[net].get(), crit_user - net_crit[net].criticality.begin())); if (crit > highest_crit) {
highest_crit = crit;
crit_user_idx = i;
}
}
if (highest_crit > crit_thresh)
crit_nets.push_back(std::make_pair(ni, crit_user_idx));
} }
auto port_user_index = [](CellInfo *cell, PortInfo &port) -> size_t { auto port_user_index = [](CellInfo *cell, PortInfo &port) -> size_t {
@ -325,8 +328,6 @@ class TimingOptimiser
NetInfo *pn = port.second.net; NetInfo *pn = port.second.net;
if (pn == nullptr) if (pn == nullptr)
continue; continue;
if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty())
continue;
int ccount; int ccount;
DelayQuad combDelay; DelayQuad combDelay;
TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount);
@ -336,7 +337,7 @@ class TimingOptimiser
if (!is_path) if (!is_path)
continue; continue;
size_t user_idx = port_user_index(cell, port.second); size_t user_idx = port_user_index(cell, port.second);
float usr_crit = net_crit.at(pn->name).criticality.at(user_idx); float usr_crit = tmg.get_criticality(CellPortKey(cell->name, port.first));
if (used_ports.count(&(pn->users.at(user_idx)))) if (used_ports.count(&(pn->users.at(user_idx))))
continue; continue;
if (usr_crit >= max_crit) { if (usr_crit >= max_crit) {
@ -364,8 +365,7 @@ class TimingOptimiser
NetInfo *pn = port.second.net; NetInfo *pn = port.second.net;
if (pn == nullptr) if (pn == nullptr)
continue; continue;
if (!net_crit.count(pn->name) || net_crit.at(pn->name).criticality.empty())
continue;
int ccount; int ccount;
DelayQuad combDelay; DelayQuad combDelay;
TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount); TimingPortClass tpclass = ctx->getPortTimingClass(cell, port.first, ccount);
@ -374,12 +374,12 @@ class TimingOptimiser
bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay); bool is_path = ctx->getCellDelay(cell, fwd_cursor->port, port.first, combDelay);
if (!is_path) if (!is_path)
continue; continue;
auto &crits = net_crit.at(pn->name).criticality; for (size_t i = 0; i < pn->users.size(); i++) {
for (size_t i = 0; i < crits.size(); i++) {
if (used_ports.count(&(pn->users.at(i)))) if (used_ports.count(&(pn->users.at(i))))
continue; continue;
if (crits.at(i) >= max_crit) { float crit = tmg.get_criticality(CellPortKey(pn->users.at(i)));
max_crit = crits.at(i); if (crit >= max_crit) {
max_crit = crit;
crit_sink = std::make_pair(pn, i); crit_sink = std::make_pair(pn, i);
} }
} }
@ -420,12 +420,7 @@ class TimingOptimiser
for (auto port : path) { for (auto port : path) {
if (ctx->debug) { if (ctx->debug) {
float crit = 0; float crit = tmg.get_criticality(CellPortKey(*port));
NetInfo *pn = port->cell->ports.at(port->port).net;
if (net_crit.count(pn->name) && !net_crit.at(pn->name).criticality.empty())
for (size_t i = 0; i < pn->users.size(); i++)
if (pn->users.at(i).cell == port->cell && pn->users.at(i).port == port->port)
crit = net_crit.at(pn->name).criticality.at(i);
log_info(" %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx), log_info(" %s.%s at %s crit %0.02f\n", port->cell->name.c_str(ctx), port->port.c_str(ctx),
ctx->nameOfBel(port->cell->bel), crit); ctx->nameOfBel(port->cell->bel), crit);
} }
@ -613,10 +608,9 @@ class TimingOptimiser
std::unordered_map<BelId, std::unordered_set<IdString>> bel_candidate_cells; std::unordered_map<BelId, std::unordered_set<IdString>> bel_candidate_cells;
// Map cell ports to net delay limit // Map cell ports to net delay limit
std::unordered_map<std::pair<IdString, IdString>, delay_t> max_net_delay; std::unordered_map<std::pair<IdString, IdString>, delay_t> max_net_delay;
// Criticality data from timing analysis
NetCriticalityMap net_crit;
Context *ctx; Context *ctx;
TimingOptCfg cfg; TimingOptCfg cfg;
TimingAnalyser tmg;
}; };
bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx, cfg).optimise(); } bool timing_opt(Context *ctx, TimingOptCfg cfg) { return TimingOptimiser(ctx, cfg).optimise(); }

View File

@ -181,6 +181,98 @@ template <typename ForwardRange> inline auto get_only_value(ForwardRange r)
return get_only_value(b, e); return get_only_value(b, e);
} }
// From Yosys
// https://github.com/YosysHQ/yosys/blob/0fb4224ebca86156a1296b9210116d9a9cbebeed/kernel/utils.h#L131
template <typename T, typename C = std::less<T>> struct TopoSort
{
bool analyze_loops, found_loops;
std::map<T, std::set<T, C>, C> database;
std::set<std::set<T, C>> loops;
std::vector<T> sorted;
TopoSort()
{
analyze_loops = true;
found_loops = false;
}
void node(T n)
{
if (database.count(n) == 0)
database[n] = std::set<T, C>();
}
void edge(T left, T right)
{
node(left);
database[right].insert(left);
}
void sort_worker(const T &n, std::set<T, C> &marked_cells, std::set<T, C> &active_cells,
std::vector<T> &active_stack)
{
if (active_cells.count(n)) {
found_loops = true;
if (analyze_loops) {
std::set<T, C> loop;
for (int i = int(active_stack.size()) - 1; i >= 0; i--) {
loop.insert(active_stack[i]);
if (active_stack[i] == n)
break;
}
loops.insert(loop);
}
return;
}
if (marked_cells.count(n))
return;
if (!database.at(n).empty()) {
if (analyze_loops)
active_stack.push_back(n);
active_cells.insert(n);
for (auto &left_n : database.at(n))
sort_worker(left_n, marked_cells, active_cells, active_stack);
if (analyze_loops)
active_stack.pop_back();
active_cells.erase(n);
}
marked_cells.insert(n);
sorted.push_back(n);
}
bool sort()
{
loops.clear();
sorted.clear();
found_loops = false;
std::set<T, C> marked_cells;
std::set<T, C> active_cells;
std::vector<T> active_stack;
for (auto &it : database)
sort_worker(it.first, marked_cells, active_cells, active_stack);
NPNR_ASSERT(sorted.size() == database.size());
return !found_loops;
}
};
template <typename T> struct reversed_range_t
{
T &obj;
explicit reversed_range_t(T &obj) : obj(obj){};
auto begin() { return obj.rbegin(); }
auto end() { return obj.rend(); }
};
template <typename T> reversed_range_t<T> reversed_range(T &obj) { return reversed_range_t<T>(obj); }
NEXTPNR_NAMESPACE_END NEXTPNR_NAMESPACE_END
#endif #endif

View File

@ -95,8 +95,8 @@ bool Arch::isBelLocationValid(BelId bel) const
void Arch::permute_luts() void Arch::permute_luts()
{ {
NetCriticalityMap nc; TimingAnalyser tmg(getCtx());
get_criticalities(getCtx(), &nc); tmg.setup();
std::unordered_map<PortInfo *, size_t> port_to_user; std::unordered_map<PortInfo *, size_t> port_to_user;
for (auto net : sorted(nets)) { for (auto net : sorted(nets)) {
@ -121,13 +121,7 @@ void Arch::permute_luts()
ci->ports[port_names.at(i)].type = PORT_IN; ci->ports[port_names.at(i)].type = PORT_IN;
} }
auto &port = ci->ports.at(port_names.at(i)); auto &port = ci->ports.at(port_names.at(i));
float crit = 0; float crit = (port.net == nullptr) ? 0 : tmg.get_criticality(CellPortKey(ci->name, port_names.at(i)));
if (port.net != nullptr && nc.count(port.net->name)) {
auto &n = nc.at(port.net->name);
size_t usr = port_to_user.at(&port);
if (usr < n.criticality.size())
crit = n.criticality.at(usr);
}
orig_nets.push_back(port.net); orig_nets.push_back(port.net);
inputs.emplace_back(crit, i); inputs.emplace_back(crit, i);
} }

View File

@ -28,9 +28,9 @@ NEXTPNR_NAMESPACE_BEGIN
struct NexusPostPlaceOpt struct NexusPostPlaceOpt
{ {
Context *ctx; Context *ctx;
NetCriticalityMap net_crit; TimingAnalyser tmg;
NexusPostPlaceOpt(Context *ctx) : ctx(ctx){}; NexusPostPlaceOpt(Context *ctx) : ctx(ctx), tmg(ctx){};
inline bool is_constrained(CellInfo *cell) inline bool is_constrained(CellInfo *cell)
{ {
@ -139,7 +139,7 @@ struct NexusPostPlaceOpt
void operator()() void operator()()
{ {
get_criticalities(ctx, &net_crit); tmg.setup();
opt_lutffs(); opt_lutffs();
} }