From 8d0f52fbf9b0bf4356ad342d8cc3b5fa6e423520 Mon Sep 17 00:00:00 2001 From: Rowan Goemans Date: Wed, 11 Sep 2024 08:23:46 +0200 Subject: [PATCH 1/2] timing: Move towards DelayPairs for timing reporting (#1359) --- common/kernel/nextpnr_types.h | 47 +++++++++++++++++++++++++++++------ common/kernel/report.cc | 25 +++++++++++++------ common/kernel/timing.cc | 22 +++++++++------- common/kernel/timing.h | 18 ++++++-------- common/kernel/timing_log.cc | 40 ++++++++++++++++++----------- 5 files changed, 102 insertions(+), 50 deletions(-) diff --git a/common/kernel/nextpnr_types.h b/common/kernel/nextpnr_types.h index 163ad2f2..ec1ac111 100644 --- a/common/kernel/nextpnr_types.h +++ b/common/kernel/nextpnr_types.h @@ -80,7 +80,7 @@ struct PortRef // minimum and maximum delay struct DelayPair { - DelayPair(){}; + DelayPair() : min_delay(0), max_delay(0) {}; explicit DelayPair(delay_t delay) : min_delay(delay), max_delay(delay) {} DelayPair(delay_t min_delay, delay_t max_delay) : min_delay(min_delay), max_delay(max_delay) {} delay_t minDelay() const { return min_delay; } @@ -94,13 +94,25 @@ struct DelayPair { return {min_delay - other.min_delay, max_delay - other.max_delay}; } + DelayPair &operator+=(const DelayPair &rhs) + { + min_delay += rhs.min_delay; + max_delay += rhs.max_delay; + return *this; + } + DelayPair &operator-=(const DelayPair &rhs) + { + min_delay -= rhs.min_delay; + max_delay -= rhs.max_delay; + return *this; + } }; // four-quadrant, min and max rise and fall delay struct DelayQuad { DelayPair rise, fall; - DelayQuad() {} + DelayQuad() : rise(0), fall(0) {} explicit DelayQuad(delay_t delay) : rise(delay), fall(delay) {} DelayQuad(delay_t min_delay, delay_t max_delay) : rise(min_delay, max_delay), fall(min_delay, max_delay) {} DelayQuad(DelayPair rise, DelayPair fall) : rise(rise), fall(fall) {} @@ -120,6 +132,19 @@ struct DelayQuad DelayQuad operator+(const DelayQuad &other) const { return {rise + other.rise, fall + other.fall}; } DelayQuad operator-(const DelayQuad &other) const { return {rise - other.rise, fall - other.fall}; } + DelayQuad &operator+=(const DelayQuad &rhs) + { + rise += rhs.rise; + fall += rhs.fall; + return *this; + } + + DelayQuad &operator-=(const DelayQuad &rhs) + { + rise -= rhs.rise; + fall -= rhs.fall; + return *this; + } }; struct ClockConstraint; @@ -200,7 +225,7 @@ struct PseudoCell virtual bool getDelay(IdString fromPort, IdString toPort, DelayQuad &delay) const = 0; virtual TimingPortClass getPortTimingClass(IdString port, int &clockInfoCount) const = 0; virtual TimingClockingInfo getPortClockingInfo(IdString port, int index) const = 0; - virtual ~PseudoCell(){}; + virtual ~PseudoCell() {}; }; struct RegionPlug : PseudoCell @@ -336,15 +361,18 @@ struct CriticalPath // To cell.port std::pair to; // Segment delay - delay_t delay; + DelayPair delay; }; // Clock pair ClockPair clock_pair; // Total path delay - delay_t delay; - // Period (max allowed delay) - delay_t period; + DelayPair delay; + + // if delay.minDelay() < bound.minDelay() then this is a hold violation + // if delay.maxDelay() > bound.maxDelay() then this is a setup violation + DelayPair bound; + // Individual path segments std::vector segments; }; @@ -357,7 +385,7 @@ struct NetSinkTiming // Cell and port (the sink) std::pair cell_port; // Delay - delay_t delay; + DelayPair delay; }; struct TimingResult @@ -379,6 +407,9 @@ struct TimingResult // Histogram of slack dict slack_histogram; + + // TODO: Hold time violations + // dict hold_violations; }; // Represents the contents of a non-leaf cell in a design diff --git a/common/kernel/report.cc b/common/kernel/report.cc index 5fa51b6f..917740b9 100644 --- a/common/kernel/report.cc +++ b/common/kernel/report.cc @@ -73,11 +73,11 @@ static Json::array json_report_critical_paths(const Context *ctx) {"port", segment.to.second.c_str(ctx)}, {"loc", Json::array({toLoc.x, toLoc.y})}}); - auto segmentJson = Json::object({ - {"delay", ctx->getDelayNS(segment.delay)}, - {"from", fromJson}, - {"to", toJson}, - }); + auto minDelay = ctx->getDelayNS(segment.delay.minDelay()); + auto maxDelay = ctx->getDelayNS(segment.delay.maxDelay()); + + auto segmentJson = + Json::object({{"delay", Json::array({minDelay, maxDelay})}, {"from", fromJson}, {"to", toJson}}); if (segment.type == CriticalPath::Segment::Type::CLK_TO_Q) { segmentJson["type"] = "clk-to-q"; @@ -130,10 +130,13 @@ static Json::array json_report_detailed_net_timings(const Context *ctx) Json::array endpointsJson; for (const auto &sink_timing : it.second) { + auto minDelay = ctx->getDelayNS(sink_timing.delay.minDelay()); + auto maxDelay = ctx->getDelayNS(sink_timing.delay.maxDelay()); + auto endpointJson = Json::object({{"cell", sink_timing.cell_port.first.c_str(ctx)}, {"port", sink_timing.cell_port.second.c_str(ctx)}, {"event", clock_event_name(ctx, sink_timing.clock_pair.end)}, - {"delay", ctx->getDelayNS(sink_timing.delay)}}); + {"delay", Json::array({minDelay, maxDelay})}}); endpointsJson.push_back(endpointJson); } @@ -191,7 +194,10 @@ Report JSON structure: }, "type": , "net": , - "delay": , + "delay": [ + , + , + ], } ... ] @@ -209,7 +215,10 @@ Report JSON structure: "cell": , "port": , "event": , - "delay": , + "delay": [ + , + , + ], } ... ] diff --git a/common/kernel/timing.cc b/common/kernel/timing.cc index 7256ed1b..105d20f9 100644 --- a/common/kernel/timing.cc +++ b/common/kernel/timing.cc @@ -502,6 +502,8 @@ void TimingAnalyser::identify_related_domains() void TimingAnalyser::reset_times() { + static const auto init_delay = + DelayPair(std::numeric_limits::max(), std::numeric_limits::lowest()); for (auto &port : ports) { auto do_reset = [&](dict ×) { for (auto &t : times) { @@ -758,7 +760,7 @@ void TimingAnalyser::build_detailed_net_timing_report() sink_timing.clock_pair.end.clock = capture.clock; sink_timing.clock_pair.end.edge = capture.edge; sink_timing.cell_port = std::make_pair(pd.cell_port.cell, pd.cell_port.port); - sink_timing.delay = arr.second.value.max_delay; + sink_timing.delay = arr.second.value; net_timings[net->name].push_back(sink_timing); } @@ -802,23 +804,25 @@ CriticalPath TimingAnalyser::build_critical_path_report(domain_id_t domain_pair, auto &launch = domains.at(dp.key.launch).key; auto &capture = domains.at(dp.key.capture).key; + report.delay = DelayPair(0); + report.clock_pair.start.clock = launch.clock; report.clock_pair.start.edge = launch.edge; report.clock_pair.end.clock = capture.clock; report.clock_pair.end.edge = capture.edge; - report.period = ctx->getDelayFromNS(1.0e9 / ctx->setting("target_freq")); + report.bound = DelayPair(0, ctx->getDelayFromNS(1.0e9 / ctx->setting("target_freq"))); if (launch.edge != capture.edge) { - report.period = report.period / 2; + report.bound.max_delay = report.bound.max_delay / 2; } if (!launch.is_async() && ctx->nets.at(launch.clock)->clkconstr) { if (launch.edge == capture.edge) { - report.period = ctx->nets.at(launch.clock)->clkconstr->period.minDelay(); + report.bound.max_delay = ctx->nets.at(launch.clock)->clkconstr->period.minDelay(); } else if (capture.edge == RISING_EDGE) { - report.period = ctx->nets.at(launch.clock)->clkconstr->low.minDelay(); + report.bound.max_delay = ctx->nets.at(launch.clock)->clkconstr->low.minDelay(); } else if (capture.edge == FALLING_EDGE) { - report.period = ctx->nets.at(launch.clock)->clkconstr->high.minDelay(); + report.bound.max_delay = ctx->nets.at(launch.clock)->clkconstr->high.minDelay(); } } @@ -895,13 +899,13 @@ CriticalPath TimingAnalyser::build_critical_path_report(domain_id_t domain_pair, seg_logic.type = CriticalPath::Segment::Type::LOGIC; } - seg_logic.delay = comb_delay.maxDelay(); + seg_logic.delay = comb_delay.delayPair(); seg_logic.from = std::make_pair(last_cell->name, last_port); seg_logic.to = std::make_pair(driver_cell->name, driver.port); seg_logic.net = IdString(); report.segments.push_back(seg_logic); - auto net_delay = ctx->getNetinfoRouteDelay(net, sink); + auto net_delay = DelayPair(ctx->getNetinfoRouteDelay(net, sink)); CriticalPath::Segment seg_route; seg_route.type = CriticalPath::Segment::Type::ROUTING; @@ -919,7 +923,7 @@ CriticalPath TimingAnalyser::build_critical_path_report(domain_id_t domain_pair, auto sinkClass = ctx->getPortTimingClass(crit_path.back().cell, crit_path.back().port, clockCount); if (sinkClass == TMG_REGISTER_INPUT && clockCount > 0) { auto sinkClockInfo = ctx->getPortClockingInfo(crit_path.back().cell, crit_path.back().port, 0); - delay_t setup = sinkClockInfo.setup.maxDelay(); + auto setup = sinkClockInfo.setup; CriticalPath::Segment seg_logic; seg_logic.type = CriticalPath::Segment::Type::SETUP; diff --git a/common/kernel/timing.h b/common/kernel/timing.h index 3dabe4f1..dae3ba27 100644 --- a/common/kernel/timing.h +++ b/common/kernel/timing.h @@ -27,8 +27,8 @@ NEXTPNR_NAMESPACE_BEGIN struct CellPortKey { - CellPortKey(){}; - CellPortKey(IdString cell, IdString port) : cell(cell), port(port){}; + CellPortKey() {}; + CellPortKey(IdString cell, IdString port) : cell(cell), port(port) {}; explicit CellPortKey(const PortRef &pr) { NPNR_ASSERT(pr.cell != nullptr); @@ -49,7 +49,7 @@ struct ClockDomainKey { IdString clock; ClockEdge edge; - ClockDomainKey(IdString clock_net, ClockEdge edge) : clock(clock_net), edge(edge){}; + ClockDomainKey(IdString clock_net, ClockEdge edge) : clock(clock_net), edge(edge) {}; // probably also need something here to deal with constraints inline bool is_async() const { return clock == IdString(); } @@ -63,7 +63,7 @@ typedef int domain_id_t; struct ClockDomainPairKey { domain_id_t launch, capture; - ClockDomainPairKey(domain_id_t launch, domain_id_t capture) : launch(launch), capture(capture){}; + ClockDomainPairKey(domain_id_t launch, domain_id_t capture) : launch(launch), capture(capture) {}; inline bool operator==(const ClockDomainPairKey &other) const { return (launch == other.launch) && (capture == other.capture); @@ -128,8 +128,6 @@ struct TimingAnalyser // get the N worst endpoints for a given domain pair std::vector get_worst_eps(domain_id_t domain_pair, int count); - const DelayPair init_delay{std::numeric_limits::max(), std::numeric_limits::lowest()}; - // Set arrival/required times if more/less than the current value void set_arrival_time(CellPortKey target, domain_id_t domain, DelayPair arrival, int path_length, CellPortKey prev = CellPortKey()); @@ -174,9 +172,9 @@ struct TimingAnalyser ClockEdge edge; CellArc(ArcType type, IdString other_port, DelayQuad value) - : type(type), other_port(other_port), value(value), edge(RISING_EDGE){}; + : type(type), other_port(other_port), value(value), edge(RISING_EDGE) {}; CellArc(ArcType type, IdString other_port, DelayQuad value, ClockEdge edge) - : type(type), other_port(other_port), value(value), edge(edge){}; + : type(type), other_port(other_port), value(value), edge(edge) {}; }; // Timing data for every cell port @@ -200,7 +198,7 @@ struct TimingAnalyser struct PerDomain { - PerDomain(ClockDomainKey key) : key(key){}; + PerDomain(ClockDomainKey key) : key(key) {}; ClockDomainKey key; // these are pairs (signal port; clock port) std::vector> startpoints, endpoints; @@ -208,7 +206,7 @@ struct TimingAnalyser struct PerDomainPair { - PerDomainPair(ClockDomainPairKey key) : key(key){}; + PerDomainPair(ClockDomainPairKey key) : key(key) {}; ClockDomainPairKey key; DelayPair period{0}; delay_t worst_setup_slack, worst_hold_slack; diff --git a/common/kernel/timing_log.cc b/common/kernel/timing_log.cc index 1bc1e116..24aac665 100644 --- a/common/kernel/timing_log.cc +++ b/common/kernel/timing_log.cc @@ -68,7 +68,18 @@ static void log_crit_paths(const Context *ctx, TimingResult &result) // A helper function for reporting one critical path auto print_path_report = [ctx](const CriticalPath &path) { - delay_t total = 0, logic_total = 0, route_total = 0; + DelayPair total(0), logic_total(0), route_total(0); + + // We print out the max delay since that's usually the interesting case + // But if we know this critical path has violated hold time we print the + // min delay instead + bool hold_violation = path.delay.minDelay() < path.bound.minDelay(); + auto get_delay_ns = [hold_violation, ctx](const DelayPair &d) { + if (hold_violation) { + ctx->getDelayNS(d.minDelay()); + } + return ctx->getDelayNS(d.maxDelay()); + }; log_info("curr total\n"); for (const auto &segment : path.segments) { @@ -83,10 +94,10 @@ static void log_crit_paths(const Context *ctx, TimingResult &result) const std::string type_name = (segment.type == CriticalPath::Segment::Type::SETUP) ? "Setup" : "Source"; - log_info("%4.1f %4.1f %s %s.%s\n", ctx->getDelayNS(segment.delay), ctx->getDelayNS(total), - type_name.c_str(), segment.to.first.c_str(ctx), segment.to.second.c_str(ctx)); + log_info("%4.1f %4.1f %s %s.%s\n", get_delay_ns(segment.delay), get_delay_ns(total), type_name.c_str(), + segment.to.first.c_str(ctx), segment.to.second.c_str(ctx)); } else if (segment.type == CriticalPath::Segment::Type::ROUTING) { - route_total += segment.delay; + route_total = route_total + segment.delay; const auto &driver = ctx->cells.at(segment.from.first); const auto &sink = ctx->cells.at(segment.to.first); @@ -94,9 +105,8 @@ static void log_crit_paths(const Context *ctx, TimingResult &result) auto driver_loc = ctx->getBelLocation(driver->bel); auto sink_loc = ctx->getBelLocation(sink->bel); - log_info("%4.1f %4.1f Net %s (%d,%d) -> (%d,%d)\n", ctx->getDelayNS(segment.delay), - ctx->getDelayNS(total), segment.net.c_str(ctx), driver_loc.x, driver_loc.y, sink_loc.x, - sink_loc.y); + log_info("%4.1f %4.1f Net %s (%d,%d) -> (%d,%d)\n", get_delay_ns(segment.delay), get_delay_ns(total), + segment.net.c_str(ctx), driver_loc.x, driver_loc.y, sink_loc.x, sink_loc.y); log_info(" Sink %s.%s\n", segment.to.first.c_str(ctx), segment.to.second.c_str(ctx)); const NetInfo *net = ctx->nets.at(segment.net).get(); @@ -134,7 +144,7 @@ static void log_crit_paths(const Context *ctx, TimingResult &result) } } } - log_info("%.1f ns logic, %.1f ns routing\n", ctx->getDelayNS(logic_total), ctx->getDelayNS(route_total)); + log_info("%.1f ns logic, %.1f ns routing\n", get_delay_ns(logic_total), get_delay_ns(route_total)); }; // Single domain paths @@ -223,7 +233,7 @@ static void log_fmax(Context *ctx, TimingResult &result, bool warn_on_failure) continue; } - delay_t path_delay = 0; + DelayPair path_delay(0); for (const auto &segment : report.segments) { path_delay += segment.delay; } @@ -232,13 +242,13 @@ static void log_fmax(Context *ctx, TimingResult &result, bool warn_on_failure) // result is negative then only the latter matters. Otherwise // the compensated path delay is taken. auto clock_delay = result.clock_delays.at(key); - path_delay -= clock_delay; + path_delay -= DelayPair(clock_delay); float fmax = std::numeric_limits::infinity(); - if (path_delay < 0) { + if (path_delay.maxDelay() < 0) { fmax = 1e3f / ctx->getDelayNS(clock_delay); - } else if (path_delay > 0) { - fmax = 1e3f / ctx->getDelayNS(path_delay); + } else if (path_delay.maxDelay() > 0) { + fmax = 1e3f / ctx->getDelayNS(path_delay.maxDelay()); } // Both clocks are related so they should have the same @@ -306,12 +316,12 @@ static void log_fmax(Context *ctx, TimingResult &result, bool warn_on_failure) for (auto &report : result.xclock_paths) { const ClockEvent &a = report.clock_pair.start; const ClockEvent &b = report.clock_pair.end; - delay_t path_delay = 0; + DelayPair path_delay(0); for (const auto &segment : report.segments) { path_delay += segment.delay; } auto ev_a = clock_event_name(ctx, a, start_field_width), ev_b = clock_event_name(ctx, b, end_field_width); - log_info("Max delay %s -> %s: %0.02f ns\n", ev_a.c_str(), ev_b.c_str(), ctx->getDelayNS(path_delay)); + log_info("Max delay %s -> %s: %0.02f ns\n", ev_a.c_str(), ev_b.c_str(), ctx->getDelayNS(path_delay.maxDelay())); } log_break(); } From ff7b8535bc78a1253b053ea5d7ce84065a39bcaf Mon Sep 17 00:00:00 2001 From: YRabbit Date: Wed, 11 Sep 2024 19:18:26 +1000 Subject: [PATCH 2/2] Gowin. Add DHCEN primitive. (#1349) * Gowin. Add DHCEN primitive. This primitive allows you to dynamically turn off and turn on the networks of high-speed clocks. This is done tracking the routes to the sinks and if the route passes through a special HCLK MUX (this may be the input MUX or the output MUX, as well as the interbank MUX), then the control signal of this MUX is used. Signed-off-by: YRabbit * Gowin. Change the DHCEN binding Use the entire PIP instead of a wire - avoids normalisation and may also be useful in the future when calculating clock stuff. Signed-off-by: YRabbit --------- Signed-off-by: YRabbit --- himbaechel/uarch/gowin/constids.inc | 4 +- himbaechel/uarch/gowin/globals.cc | 133 +++++++++++++++++++---- himbaechel/uarch/gowin/gowin.h | 19 +++- himbaechel/uarch/gowin/gowin_arch_gen.py | 50 ++++++++- himbaechel/uarch/gowin/gowin_utils.cc | 16 +++ himbaechel/uarch/gowin/gowin_utils.h | 1 + himbaechel/uarch/gowin/pack.cc | 33 ++++++ 7 files changed, 229 insertions(+), 27 deletions(-) diff --git a/himbaechel/uarch/gowin/constids.inc b/himbaechel/uarch/gowin/constids.inc index 62a40360..e8e6874a 100644 --- a/himbaechel/uarch/gowin/constids.inc +++ b/himbaechel/uarch/gowin/constids.inc @@ -1269,10 +1269,12 @@ X(BUFG) X(CLOCK) X(DQCE) X(DCS) -X(DCS_MODE) X(DQCE_PIP) +X(DHCEN_USED) X(DCS_USED) X(SELFORCE) +X(DHCEN) +X(DCS_MODE) //HCLK Bels X(CLKDIV) diff --git a/himbaechel/uarch/gowin/globals.cc b/himbaechel/uarch/gowin/globals.cc index e510174b..4e1f8f71 100644 --- a/himbaechel/uarch/gowin/globals.cc +++ b/himbaechel/uarch/gowin/globals.cc @@ -134,7 +134,8 @@ struct GowinGlobalRouter // Dedicated backwards BFS routing for global networks template - bool backwards_bfs_route(NetInfo *net, WireId src, WireId dst, int iter_limit, bool strict, Tfilt pip_filter) + bool backwards_bfs_route(NetInfo *net, WireId src, WireId dst, int iter_limit, bool strict, Tfilt pip_filter, + std::vector *path = nullptr) { // Queue of wires to visit std::queue visit; @@ -208,6 +209,9 @@ struct GowinGlobalRouter break; } ctx->bindPip(pip, net, STRENGTH_LOCKED); + if (path != nullptr) { + path->push_back(pip); + } } return true; } else { @@ -225,6 +229,7 @@ struct GowinGlobalRouter bool driver_is_buf(const PortRef &driver) { return CellTypePort(driver) == CellTypePort(id_BUFG, id_O); } bool driver_is_dqce(const PortRef &driver) { return CellTypePort(driver) == CellTypePort(id_DQCE, id_CLKOUT); } bool driver_is_dcs(const PortRef &driver) { return CellTypePort(driver) == CellTypePort(id_DCS, id_CLKOUT); } + bool driver_is_dhcen(const PortRef &driver) { return CellTypePort(driver) == CellTypePort(id_DHCEN, id_CLKOUT); } bool driver_is_clksrc(const PortRef &driver) { // dedicated pins @@ -276,7 +281,9 @@ struct GowinGlobalRouter ROUTED_ALL }; - RouteResult route_direct_net(NetInfo *net, WireId aux_src = WireId(), bool DCS_pips = false, bool DQCE_pips = false) + template + RouteResult route_direct_net(NetInfo *net, Tfilter pip_filter, WireId aux_src = WireId(), + std::vector *path = nullptr) { WireId src; src = aux_src == WireId() ? ctx->getNetinfoSourceWire(net) : aux_src; @@ -297,21 +304,9 @@ struct GowinGlobalRouter ctx->nameOf(usr.port)); } bool bfs_res; - if (DCS_pips) { - bfs_res = backwards_bfs_route(net, src, dst, 1000000, false, [&](PipId pip) { - return (is_relaxed_sink(usr) || global_DCS_pip_filter(pip)); - }); - } else { - if (DQCE_pips) { - bfs_res = backwards_bfs_route(net, src, dst, 1000000, false, [&](PipId pip) { - return (is_relaxed_sink(usr) || global_DQCE_pip_filter(pip)); - }); - } else { - bfs_res = backwards_bfs_route(net, src, dst, 1000000, false, [&](PipId pip) { - return (is_relaxed_sink(usr) || global_pip_filter(pip)); - }); - } - } + bfs_res = backwards_bfs_route( + net, src, dst, 1000000, false, [&](PipId pip) { return (is_relaxed_sink(usr) || pip_filter(pip)); }, + path); if (bfs_res) { routed = routed == ROUTED_PARTIALLY ? routed : ROUTED_ALL; } else { @@ -345,7 +340,8 @@ struct GowinGlobalRouter src = ctx->getBelPinWire(driver.cell->bel, driver.port); } - RouteResult route_result = route_direct_net(net, src, false, true); + RouteResult route_result = route_direct_net( + net, [&](PipId pip) { return global_DQCE_pip_filter(pip); }, src); if (route_result == NOT_ROUTED) { log_error("Can't route the %s network.\n", ctx->nameOf(net)); } @@ -422,7 +418,8 @@ struct GowinGlobalRouter src = ctx->getBelPinWire(driver.cell->bel, driver.port); } - RouteResult route_result = route_direct_net(net, src, true); + RouteResult route_result = route_direct_net( + net, [&](PipId pip) { return global_DCS_pip_filter(pip); }, src); if (route_result == NOT_ROUTED) { log_error("Can't route the %s network.\n", ctx->nameOf(net)); } @@ -487,6 +484,84 @@ struct GowinGlobalRouter ctx->cells.erase(dcs_ci->name); } + void route_dhcen_net(NetInfo *net) + { + // route net after dhcen source of CLKIN net + CellInfo *dhcen_ci = net->driver.cell; + + NetInfo *net_before_dhcen = dhcen_ci->getPort(id_CLKIN); + NPNR_ASSERT(net_before_dhcen != nullptr); + + PortRef driver = net_before_dhcen->driver; + NPNR_ASSERT_MSG(driver_is_buf(driver) || driver_is_clksrc(driver), + stringf("The input source for %s is not a clock.", ctx->nameOf(dhcen_ci)).c_str()); + + IdString port; + // use BUF input if there is one + if (driver_is_buf(driver)) { + port = id_I; + } else { + port = driver.port; + } + WireId src = ctx->getBelPinWire(driver.cell->bel, port); + + std::vector path; + RouteResult route_result = route_direct_net( + net, [&](PipId pip) { return global_pip_filter(pip); }, src, &path); + if (route_result == NOT_ROUTED) { + log_error("Can't route the %s network.\n", ctx->nameOf(net)); + } + if (route_result == ROUTED_PARTIALLY) { + log_error("It was not possible to completely route the %s net using only global resources. This is not " + "allowed for dhcen managed networks.\n", + ctx->nameOf(net)); + } + + // In networks controlled by dhcen we disable/enable only HCLK - if + // there are ordinary cells among the sinks, then they are not affected + // by this primitive. + for (PipId pip : path) { + // move to upper level net + ctx->unbindPip(pip); + ctx->bindPip(pip, net_before_dhcen, STRENGTH_LOCKED); + + WireId dst = ctx->getPipDstWire(pip); + IdString side; + BelId dhcen_bel = gwu.get_dhcen_bel(dst, side); + if (dhcen_bel == BelId()) { + continue; + } + + // One pseudo dhcen can be implemented as several hardware dhcen. + // Here we find suitable hardware dhcens. + CellInfo *hw_dhcen = ctx->getBoundBelCell(dhcen_bel); + if (ctx->debug) { + log_info(" use %s wire and %s bel for '%s' hw cell.\n", ctx->nameOfWire(dst), + ctx->nameOfBel(dhcen_bel), ctx->nameOf(hw_dhcen)); + } + + // The control network must connect the CE inputs of all hardware dhcens. + hw_dhcen->setAttr(id_DHCEN_USED, 1); + dhcen_ci->copyPortTo(id_CE, hw_dhcen, id_CE); + } + + // connect all users to upper level net + std::vector users; + for (auto &cell_port : net->users) { + users.push_back(cell_port); + } + for (PortRef &user : users) { + user.cell->disconnectPort(user.port); + user.cell->connectPort(user.port, net_before_dhcen); + } + + // remove the virtual dhcen + dhcen_ci->disconnectPort(id_CLKOUT); + dhcen_ci->disconnectPort(id_CLKIN); + dhcen_ci->disconnectPort(id_CE); + ctx->cells.erase(dhcen_ci->name); + } + void route_buffered_net(NetInfo *net) { // a) route net after buf using the buf input as source @@ -496,7 +571,8 @@ struct GowinGlobalRouter NetInfo *net_before_buf = buf_ci->getPort(id_I); NPNR_ASSERT(net_before_buf != nullptr); - RouteResult route_result = route_direct_net(net, src); + RouteResult route_result = route_direct_net( + net, [&](PipId pip) { return global_pip_filter(pip); }, src); if (route_result == NOT_ROUTED || route_result == ROUTED_PARTIALLY) { log_error("Can't route the %s net. It might be worth removing the BUFG buffer flag.\n", ctx->nameOf(net)); } @@ -516,7 +592,7 @@ struct GowinGlobalRouter void route_clk_net(NetInfo *net) { - RouteResult route_result = route_direct_net(net); + RouteResult route_result = route_direct_net(net, [&](PipId pip) { return global_pip_filter(pip); }); if (route_result != NOT_ROUTED) { log_info(" '%s' net was routed using global resources %s.\n", ctx->nameOf(net), route_result == ROUTED_ALL ? "only" : "partially"); @@ -527,7 +603,7 @@ struct GowinGlobalRouter { log_info("Routing globals...\n"); - std::vector dqce_nets, dcs_nets, buf_nets, clk_nets; + std::vector dhcen_nets, dqce_nets, dcs_nets, buf_nets, clk_nets; // Determining the priority of network routing for (auto &net : ctx->nets) { @@ -550,12 +626,25 @@ struct GowinGlobalRouter } else { if (driver_is_dcs(ni->driver)) { dcs_nets.push_back(net.first); + } else { + if (driver_is_dhcen(ni->driver)) { + dhcen_nets.push_back(net.first); + } } } } } } + // nets with DHCEN + for (IdString net_name : dhcen_nets) { + NetInfo *ni = ctx->nets.at(net_name).get(); + if (ctx->verbose) { + log_info("route dhcen net '%s'\n", ctx->nameOf(ni)); + } + route_dhcen_net(ni); + } + // nets with DQCE for (IdString net_name : dqce_nets) { NetInfo *ni = ctx->nets.at(net_name).get(); diff --git a/himbaechel/uarch/gowin/gowin.h b/himbaechel/uarch/gowin/gowin.h index 81c825f4..deb70eec 100644 --- a/himbaechel/uarch/gowin/gowin.h +++ b/himbaechel/uarch/gowin/gowin.h @@ -116,12 +116,23 @@ NPNR_PACKED_STRUCT(struct Spine_bel_POD { int32_t bel_z; }); +NPNR_PACKED_STRUCT(struct Wire_bel_POD { + int32_t pip_xy; + int32_t pip_dst; + int32_t pip_src; + int32_t bel_x; + int32_t bel_y; + int32_t bel_z; + int32_t side; +}); + NPNR_PACKED_STRUCT(struct Extra_chip_data_POD { int32_t chip_flags; Bottom_io_POD bottom_io; RelSlice diff_io_types; RelSlice dqce_bels; RelSlice dcs_bels; + RelSlice dhcen_bels; // chip flags static constexpr int32_t HAS_SP32 = 1; static constexpr int32_t NEED_SP_FIX = 2; @@ -162,10 +173,12 @@ enum VSS_Z = 278, BANDGAP_Z = 279, - DQCE_Z = 280, // : 286 reserve for 6 DQCEs - DCS_Z = 286, // : 288 reserve for 2 DCSs - USERFLASH_Z = 288, + DQCE_Z = 280, // : 286 reserve for 6 DQCEs + DCS_Z = 286, // : 288 reserve for 2 DCSs + DHCEN_Z = 288, // : 298 + + USERFLASH_Z = 298, // The two least significant bits encode Z for 9-bit adders and // multipliers, if they are equal to 0, then we get Z of their common diff --git a/himbaechel/uarch/gowin/gowin_arch_gen.py b/himbaechel/uarch/gowin/gowin_arch_gen.py index 425d9171..d8aa303e 100644 --- a/himbaechel/uarch/gowin/gowin_arch_gen.py +++ b/himbaechel/uarch/gowin/gowin_arch_gen.py @@ -51,8 +51,10 @@ BANDGAP_Z = 279 DQCE_Z = 280 # : 286 reserve for 6 DQCEs DCS_Z = 286 # : 288 reserve for 2 DCSs +DHCEN_Z = 288 # : 298 + +USERFLASH_Z = 298 -USERFLASH_Z = 288 DSP_Z = 509 @@ -167,6 +169,28 @@ class SpineBel(BBAStruct): bba.u32(self.bel_y) bba.u32(self.bel_z) +# wire -> bel for DHCEN bels +@dataclass +class WireBel(BBAStruct): + pip_xy: IdString + pip_dst: IdString + pip_src: IdString + bel_x: int + bel_y: int + bel_z: int + hclk_side: IdString + + def serialise_lists(self, context: str, bba: BBAWriter): + pass + def serialise(self, context: str, bba: BBAWriter): + bba.u32(self.pip_xy.index) + bba.u32(self.pip_dst.index) + bba.u32(self.pip_src.index) + bba.u32(self.bel_x) + bba.u32(self.bel_y) + bba.u32(self.bel_z) + bba.u32(self.hclk_side.index) + @dataclass class ChipExtraData(BBAStruct): strs: StringPool @@ -175,6 +199,7 @@ class ChipExtraData(BBAStruct): diff_io_types: list[IdString] = field(default_factory = list) dqce_bels: list[SpineBel] = field(default_factory = list) dcs_bels: list[SpineBel] = field(default_factory = list) + dhcen_bels: list[WireBel] = field(default_factory = list) def create_bottom_io(self): self.bottom_io = BottomIO() @@ -185,6 +210,9 @@ class ChipExtraData(BBAStruct): def add_diff_io_type(self, diff_type: str): self.diff_io_types.append(self.strs.id(diff_type)) + def add_dhcen_bel(self, pip_xy: str, pip_dst: str, pip_src, x: int, y: int, z: int, side: str): + self.dhcen_bels.append(WireBel(self.strs.id(pip_xy), self.strs.id(pip_dst), self.strs.id(pip_src), x, y, z, self.strs.id(side))) + def add_dqce_bel(self, spine: str, x: int, y: int, z: int): self.dqce_bels.append(SpineBel(self.strs.id(spine), x, y, z)) @@ -202,6 +230,9 @@ class ChipExtraData(BBAStruct): bba.label(f"{context}_dcs_bels") for i, t in enumerate(self.dcs_bels): t.serialise(f"{context}_dcs_bel{i}", bba) + bba.label(f"{context}_dhcen_bels") + for i, t in enumerate(self.dhcen_bels): + t.serialise(f"{context}_dhcen_bel{i}", bba) def serialise(self, context: str, bba: BBAWriter): bba.u32(self.flags) @@ -209,6 +240,7 @@ class ChipExtraData(BBAStruct): bba.slice(f"{context}_diff_io_types", len(self.diff_io_types)) bba.slice(f"{context}_dqce_bels", len(self.dqce_bels)) bba.slice(f"{context}_dcs_bels", len(self.dcs_bels)) + bba.slice(f"{context}_dhcen_bels", len(self.dhcen_bels)) @dataclass class PadExtraData(BBAStruct): @@ -425,6 +457,9 @@ dqce_bels = {} # map spine -> dcs bel dcs_bels = {} +# map HCLKIN wire -> dhcen bel +dhcen_bels = {} + def create_extra_funcs(tt: TileType, db: chipdb, x: int, y: int): if (y, x) not in db.extra_func: return @@ -453,6 +488,16 @@ def create_extra_funcs(tt: TileType, db: chipdb, x: int, y: int): tt.create_wire(wire) bel = tt.create_bel("BANDGAP", "BANDGAP", z = BANDGAP_Z) tt.add_bel_pin(bel, "BGEN", wire, PinType.INPUT) + elif func == 'dhcen': + for idx, dhcen in enumerate(desc): + wire = dhcen['ce'] + if not tt.has_wire(wire): + tt.create_wire(wire) + bel_z = DHCEN_Z + idx + bel = tt.create_bel(f"DHCEN{idx}", "DHCEN", z = bel_z) + tt.add_bel_pin(bel, "CE", wire, PinType.INPUT) + pip_xy, pip_dst, pip_src, side = dhcen['pip'] + dhcen_bels[pip_xy, pip_dst, pip_src] = (x, y, bel_z, side) elif func == 'dqce': for idx in range(6): bel_z = DQCE_Z + idx @@ -1166,6 +1211,9 @@ def create_extra_data(chip: Chip, db: chipdb, chip_flags: int): chip.extra_data.add_bottom_io_cnd(net_a, net_b) for diff_type in db.diff_io_types: chip.extra_data.add_diff_io_type(diff_type) + # create hclk wire->dhcen bel map + for pip, bel in dhcen_bels.items(): + chip.extra_data.add_dhcen_bel(pip[0], pip[1], pip[2], bel[0], bel[1], bel[2], bel[3]) # create spine->dqce bel map for spine, bel in dqce_bels.items(): chip.extra_data.add_dqce_bel(spine, bel[0], bel[1], bel[2]) diff --git a/himbaechel/uarch/gowin/gowin_utils.cc b/himbaechel/uarch/gowin/gowin_utils.cc index 0d6fb8b8..1e1c2208 100644 --- a/himbaechel/uarch/gowin/gowin_utils.cc +++ b/himbaechel/uarch/gowin/gowin_utils.cc @@ -87,6 +87,22 @@ BelId GowinUtils::get_dcs_bel(IdString spine_name) return BelId(); } +BelId GowinUtils::get_dhcen_bel(WireId hclkin_wire, IdString &side) +{ + const Extra_chip_data_POD *extra = reinterpret_cast(ctx->chip_info->extra_data.get()); + for (auto &wire_bel : extra->dhcen_bels) { + IdString dst = IdString(wire_bel.pip_dst); + IdString src = IdString(wire_bel.pip_src); + IdStringList pip = IdStringList::concat(IdStringList::concat(IdString(wire_bel.pip_xy), dst), src); + WireId wire = ctx->getPipDstWire(ctx->getPipByName(pip)); + if (wire == hclkin_wire) { + side = IdString(wire_bel.side); + return ctx->getBelByLocation(Loc(wire_bel.bel_x, wire_bel.bel_y, wire_bel.bel_z)); + } + } + return BelId(); +} + bool GowinUtils::is_simple_io_bel(BelId bel) { return chip_bel_info(ctx->chip_info, bel).flags & BelFlags::FLAG_SIMPLE_IO; diff --git a/himbaechel/uarch/gowin/gowin_utils.h b/himbaechel/uarch/gowin/gowin_utils.h index 77b75082..f9276c10 100644 --- a/himbaechel/uarch/gowin/gowin_utils.h +++ b/himbaechel/uarch/gowin/gowin_utils.h @@ -35,6 +35,7 @@ struct GowinUtils BelId get_io_bel_from_iologic(BelId bel); BelId get_dqce_bel(IdString spine_name); BelId get_dcs_bel(IdString spine_name); + BelId get_dhcen_bel(WireId hclkin_wire, IdString &side); // BSRAM bool has_SP32(void); diff --git a/himbaechel/uarch/gowin/pack.cc b/himbaechel/uarch/gowin/pack.cc index b7b64a48..8d03b3a0 100644 --- a/himbaechel/uarch/gowin/pack.cc +++ b/himbaechel/uarch/gowin/pack.cc @@ -3076,6 +3076,36 @@ struct GowinPacker } } + // ========================================= + // Create DHCENs + // ========================================= + void pack_dhcens() + { + // Allocate all available dhcen bels; we will find out which of them + // will actually be used during the routing process. + bool grab_bels = false; + for (auto &cell : ctx->cells) { + auto &ci = *cell.second; + if (ci.type == id_DHCEN) { + ci.pseudo_cell = std::make_unique(Loc(0, 0, 0)); + grab_bels = true; + } + } + if (grab_bels) { + // sane message if new primitives are used with old bases + auto buckets = ctx->getBelBuckets(); + NPNR_ASSERT_MSG(std::find(buckets.begin(), buckets.end(), id_DHCEN) != buckets.end(), + "There are no DHCEN bels to use."); + int i = 0; + for (auto &bel : ctx->getBelsInBucket(ctx->getBelBucketForCellType(id_DHCEN))) { + IdString dhcen_name = ctx->idf("$PACKER_DHCEN_%d", ++i); + CellInfo *dhcen = ctx->createCell(dhcen_name, id_DHCEN); + dhcen->addInput(id_CE); + ctx->bindBel(bel, dhcen, STRENGTH_LOCKED); + } + } + } + // ========================================= // Enable UserFlash // ========================================= @@ -3218,6 +3248,9 @@ struct GowinPacker pack_buffered_nets(); ctx->check(); + pack_dhcens(); + ctx->check(); + pack_userflash(); ctx->check();