diff --git a/3rdparty/fpga-interchange-schema b/3rdparty/fpga-interchange-schema index cb6d1684..8ec5b173 160000 --- a/3rdparty/fpga-interchange-schema +++ b/3rdparty/fpga-interchange-schema @@ -1 +1 @@ -Subproject commit cb6d16847dfca7f104205c83bbdc056303ac82a0 +Subproject commit 8ec5b1739d7b91b84df3e92ccc70c7a7ee644089 diff --git a/CMakeLists.txt b/CMakeLists.txt index 7159efd4..ff26d4ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,10 @@ if(WASI) ) else() set(USE_THREADS ON) + find_package(TBB QUIET) + if (TBB_FOUND) + add_definitions(-DNEXTPNR_USE_TBB) + endif() endif() if (NOT USE_THREADS) @@ -243,6 +247,9 @@ endif() if(PROFILER) list(APPEND EXTRA_LIB_DEPS profiler) endif() +if(TBB_FOUND) + list(APPEND EXTRA_LIB_DEPS tbb) +endif() foreach (family ${ARCH}) message(STATUS "Configuring architecture: ${family}") diff --git a/fpga_interchange/arch.cc b/fpga_interchange/arch.cc index a3a8a166..476801aa 100644 --- a/fpga_interchange/arch.cc +++ b/fpga_interchange/arch.cc @@ -24,9 +24,11 @@ #include #include #include +#include #include #include #include + #include "constraints.impl.h" #include "fpga_interchange.h" #include "log.h" @@ -42,6 +44,11 @@ // Include tcl.h late because it messed with defines and let them leave the // scope of the header. #include + +//#define DEBUG_BINDING +//#define USE_LOOKAHEAD +//#define DEBUG_CELL_PIN_MAPPING + NEXTPNR_NAMESPACE_BEGIN struct SiteBelPair { @@ -83,6 +90,22 @@ void IdString::initialize_arch(const BaseCtx *ctx) {} static const ChipInfoPOD *get_chip_info(const RelPtr *ptr) { return ptr->get(); } +static std::string sha1_hash(const char *data, size_t size) +{ + boost::uuids::detail::sha1 hasher; + hasher.process_bytes(data, size); + + // unsigned int[5] + boost::uuids::detail::sha1::digest_type digest; + hasher.get_digest(digest); + + std::ostringstream buf; + for (int i = 0; i < 5; ++i) + buf << std::hex << std::setfill('0') << std::setw(8) << digest[i]; + + return buf.str(); +} + Arch::Arch(ArchArgs args) : args(args) { try { @@ -90,6 +113,8 @@ Arch::Arch(ArchArgs args) : args(args) if (args.chipdb.empty() || !blob_file.is_open()) log_error("Unable to read chipdb %s\n", args.chipdb.c_str()); const char *blob = reinterpret_cast(blob_file.data()); + + chipdb_hash = sha1_hash(blob, blob_file.size()); chip_info = get_chip_info(reinterpret_cast *>(blob)); } catch (...) { log_error("Unable to read chipdb %s\n", args.chipdb.c_str()); @@ -249,7 +274,13 @@ Arch::Arch(ArchArgs args) : args(args) default_tags.resize(max_tag_count); } -void Arch::init() { dedicated_interconnect.init(getCtx()); } +void Arch::init() +{ +#ifdef USE_LOOKAHEAD + lookahead.init(getCtx(), getCtx()); +#endif + dedicated_interconnect.init(getCtx()); +} // ----------------------------------------------------------------------- @@ -894,18 +925,11 @@ DecalXY Arch::getGroupDecal(GroupId pip) const { return {}; }; delay_t Arch::estimateDelay(WireId src, WireId dst) const { - // FIXME: Implement something to push the A* router in the right direction. - int src_x, src_y; - get_tile_x_y(src.tile, &src_x, &src_y); - - int dst_x, dst_y; - get_tile_x_y(dst.tile, &dst_x, &dst_y); - - delay_t base = 30 * std::min(std::abs(dst_x - src_x), 18) + 10 * std::max(std::abs(dst_x - src_x) - 18, 0) + - 60 * std::min(std::abs(dst_y - src_y), 6) + 20 * std::max(std::abs(dst_y - src_y) - 6, 0) + 300; - - base = (base * 3) / 2; - return base; +#ifdef USE_LOOKAHEAD + return lookahead.estimateDelay(getCtx(), src, dst); +#else + return 0; +#endif } delay_t Arch::predictDelay(const NetInfo *net_info, const PortRef &sink) const @@ -1757,6 +1781,8 @@ bool Arch::checkPipAvailForNet(PipId pip, NetInfo *net) const bool Arch::checkPipAvail(PipId pip) const { return checkPipAvailForNet(pip, nullptr); } +std::string Arch::get_chipdb_hash() const { return chipdb_hash; } + // Instance constraint templates. template void Arch::ArchConstraints::bindBel(Arch::ArchConstraints::TagState *, const Arch::ConstraintRange); template void Arch::ArchConstraints::unbindBel(Arch::ArchConstraints::TagState *, const Arch::ConstraintRange); diff --git a/fpga_interchange/arch.h b/fpga_interchange/arch.h index 217e3508..d167f797 100644 --- a/fpga_interchange/arch.h +++ b/fpga_interchange/arch.h @@ -36,6 +36,7 @@ #include "arch_iterators.h" #include "chipdb.h" #include "dedicated_interconnect.h" +#include "lookahead.h" #include "site_router.h" #include "site_routing_cache.h" @@ -45,6 +46,8 @@ struct ArchArgs { std::string chipdb; std::string package; + bool rebuild_lookahead; + bool dont_write_lookahead; }; struct ArchRanges @@ -1038,8 +1041,13 @@ struct Arch : ArchAPI std::regex verilog_hex_constant; void read_lut_equation(DynamicBitarray<> *equation, const Property &equation_parameter) const; bool route_vcc_to_unused_lut_pins(); + + Lookahead lookahead; mutable RouteNodeStorage node_storage; mutable SiteRoutingCache site_routing_cache; + + std::string chipdb_hash; + std::string get_chipdb_hash() const; }; NEXTPNR_NAMESPACE_END diff --git a/fpga_interchange/cost_map.cc b/fpga_interchange/cost_map.cc new file mode 100644 index 00000000..b42f115d --- /dev/null +++ b/fpga_interchange/cost_map.cc @@ -0,0 +1,400 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021 Symbiflow Authors + * + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "cost_map.h" + +#include "context.h" +#include "log.h" + +NEXTPNR_NAMESPACE_BEGIN + +///@brief Factor to adjust the penalty calculation for deltas outside the segment bounding box: +// factor < 1.0: penalty has less impact on the final returned delay +// factor > 1.0: penalty has more impact on the final returned delay +static constexpr float PENALTY_FACTOR = 1.f; + +///@brief Minimum penalty cost that is added when penalizing a delta outside the segment bounding box. +static constexpr delay_t PENALTY_MIN = 1; + +// also known as the L1 norm +static int manhattan_distance(const std::pair &a, const std::pair &b) +{ + return std::abs(b.first - a.first) + std::abs(b.second - a.second); +} + +static delay_t penalize(const delay_t &entry, int distance, delay_t penalty) +{ + penalty = std::max(penalty, PENALTY_MIN); + return entry + distance * penalty * PENALTY_FACTOR; +} + +delay_t CostMap::get_delay(const Context *ctx, WireId src_wire, WireId dst_wire) const +{ + TypeWirePair type_pair; + type_pair.src = TypeWireId(ctx, src_wire); + type_pair.dst = TypeWireId(ctx, dst_wire); + + int src_tile; + if (src_wire.tile == -1) { + src_tile = ctx->chip_info->nodes[src_wire.index].tile_wires[0].tile; + } else { + src_tile = src_wire.tile; + } + + int32_t src_x, src_y; + ctx->get_tile_x_y(src_tile, &src_x, &src_y); + + int dst_tile; + if (dst_wire.tile == -1) { + dst_tile = ctx->chip_info->nodes[dst_wire.index].tile_wires[0].tile; + } else { + dst_tile = dst_wire.tile; + } + + int32_t dst_x, dst_y; + ctx->get_tile_x_y(dst_tile, &dst_x, &dst_y); + + auto iter = cost_map_.find(type_pair); + if (iter == cost_map_.end()) { + auto &src_type = ctx->chip_info->tile_types[type_pair.src.type]; + IdString src_tile_type(src_type.name); + IdString src_wire_name(src_type.wire_data[type_pair.src.index].name); + + auto &dst_type = ctx->chip_info->tile_types[type_pair.dst.type]; + IdString dst_tile_type(dst_type.name); + IdString dst_wire_name(dst_type.wire_data[type_pair.dst.index].name); + +#if 0 + log_warning("Delay matrix is missing %s/%s -> %s/%s\n", + src_tile_type.c_str(ctx), + src_wire_name.c_str(ctx), + dst_tile_type.c_str(ctx), + dst_wire_name.c_str(ctx)); +#endif + return std::numeric_limits::max(); + } + + const auto &delay_matrix = iter->second; + + int32_t off_x = delay_matrix.offset.first + (dst_x - src_x); + int32_t off_y = delay_matrix.offset.second + (dst_y - src_y); + + int32_t x_dim = delay_matrix.data.shape()[0]; + int32_t y_dim = delay_matrix.data.shape()[1]; + NPNR_ASSERT(x_dim > 0); + NPNR_ASSERT(y_dim > 0); + + // Bound closest_x/y to [0, dim) + int32_t closest_x = std::min(std::max(off_x, 0), x_dim - 1); + int32_t closest_y = std::min(std::max(off_y, 0), y_dim - 1); + + // Get the cost entry from the cost map at the deltas values + auto cost = delay_matrix.data[closest_x][closest_y]; + NPNR_ASSERT(cost >= 0); + + // Get the base penalty corresponding to the current segment. + auto penalty = delay_matrix.penalty; + + // Get the distance between the closest point in the bounding box and the original coordinates. + // Note that if the original coordinates are within the bounding box, the distance will be equal to zero. + auto distance = manhattan_distance(std::make_pair(off_x, off_y), std::make_pair(closest_x, closest_y)); + + // Return the penalized cost (no penalty is added if the coordinates are within the bounding box). + return penalize(cost, distance, penalty); +} + +void CostMap::set_cost_map(const Context *ctx, const TypeWirePair &wire_pair, + const HashTables::HashMap, delay_t> &delays) +{ + CostMapEntry delay_matrix; + + auto &offset = delay_matrix.offset; + offset.first = 0; + offset.second = 0; + + int32_t max_x_offset = 0; + int32_t max_y_offset = 0; + + for (const auto &delay_pair : delays) { + auto &dx_dy = delay_pair.first; + offset.first = std::max(-dx_dy.first, offset.first); + offset.second = std::max(-dx_dy.second, offset.second); + max_x_offset = std::max(dx_dy.first, max_x_offset); + max_y_offset = std::max(dx_dy.second, max_y_offset); + } + + int32_t x_dim = offset.first + max_x_offset + 1; + int32_t y_dim = offset.second + max_y_offset + 1; + + delay_matrix.data.resize(boost::extents[x_dim][y_dim]); + + // Fill matrix with sentinel of -1 to know where the holes in the matrix + // are. + std::fill_n(delay_matrix.data.data(), delay_matrix.data.num_elements(), -1); + + for (const auto &delay_pair : delays) { + auto &dx_dy = delay_pair.first; + int32_t off_x = dx_dy.first + offset.first; + int32_t off_y = dx_dy.second + offset.second; + NPNR_ASSERT(off_x >= 0); + NPNR_ASSERT(off_x < x_dim); + NPNR_ASSERT(off_y >= 0); + NPNR_ASSERT(off_y < y_dim); + + delay_matrix.data[off_x][off_y] = delay_pair.second; + } + + delay_matrix.penalty = get_penalty(delay_matrix.data); + fill_holes(ctx, wire_pair, delay_matrix.data, delay_matrix.penalty); + + { + cost_map_mutex_.lock(); + auto result = cost_map_.emplace(wire_pair, delay_matrix); + NPNR_ASSERT(result.second); + cost_map_mutex_.unlock(); + } +} + +static void assign_min_entry(delay_t *dst, const delay_t &src) +{ + if (src >= 0) { + if (*dst < 0) { + *dst = src; + } else if (src < *dst) { + *dst = src; + } + } +} + +std::pair CostMap::get_nearby_cost_entry(const boost::multi_array &matrix, int cx, int cy, + const ArcBounds &bounds) +{ +#ifdef DEBUG_FILL + log_info("Filling %d, %d within (%d, %d, %d, %d)\n", cx, cy, bounds.x0, bounds.y0, bounds.x1, bounds.y1); +#endif + + // spiral around (cx, cy) looking for a nearby entry + bool in_bounds = bounds.contains(cx, cy); + if (!in_bounds) { +#ifdef DEBUG_FILL + log_info("Already out of bounds, return!\n"); +#endif + return std::make_pair(-1, 0); + } + int n = 0; + delay_t fill(matrix[cx][cy]); + + while (in_bounds && (fill < 0)) { + n++; +#ifdef DEBUG_FILL + log_info("At n = %d\n", n); +#endif + in_bounds = false; + delay_t min_entry = -1; + for (int ox = -n; ox <= n; ox++) { + int x = cx + ox; + int oy = n - abs(ox); + int yp = cy + oy; + int yn = cy - oy; +#ifdef DEBUG_FILL + log_info("Testing %d, %d\n", x, yp); +#endif + if (bounds.contains(x, yp)) { + assign_min_entry(&min_entry, matrix[x][yp]); + in_bounds = true; +#ifdef DEBUG_FILL + log_info("matrix[%d, %d] = %d, min_entry = %d\n", x, yp, matrix[x][yp], min_entry); +#endif + } +#ifdef DEBUG_FILL + log_info("Testing %d, %d\n", x, yn); +#endif + if (bounds.contains(x, yn)) { + assign_min_entry(&min_entry, matrix[x][yn]); + in_bounds = true; +#ifdef DEBUG_FILL + log_info("matrix[%d, %d] = %d, min_entry = %d\n", x, yn, matrix[x][yn], min_entry); +#endif + } + } + + if (fill < 0 && min_entry >= 0) { + fill = min_entry; + } + } + + return std::make_pair(fill, n); +} + +void CostMap::fill_holes(const Context *ctx, const TypeWirePair &type_pair, boost::multi_array &matrix, + delay_t delay_penalty) +{ + // find missing cost entries and fill them in by copying a nearby cost entry + std::vector> missing; + bool couldnt_fill = false; + auto shifted_bounds = ArcBounds(0, 0, matrix.shape()[0] - 1, matrix.shape()[1] - 1); + int max_fill = 0; + for (unsigned ix = 0; ix < matrix.shape()[0]; ix++) { + for (unsigned iy = 0; iy < matrix.shape()[1]; iy++) { + delay_t &cost_entry = matrix[ix][iy]; + if (cost_entry < 0) { + // maximum search radius + delay_t filler; + int distance; + std::tie(filler, distance) = get_nearby_cost_entry(matrix, ix, iy, shifted_bounds); + if (filler >= 0) { + missing.push_back(std::make_tuple(ix, iy, penalize(filler, distance, delay_penalty))); + max_fill = std::max(max_fill, distance); + } else { + couldnt_fill = true; + } + } + } + if (couldnt_fill) { + // give up trying to fill an empty matrix + break; + } + } + + if (!couldnt_fill && max_fill > 0) { + if (ctx->verbose) { + auto &src_type_data = ctx->chip_info->tile_types[type_pair.src.type]; + IdString src_type(src_type_data.name); + IdString src_wire(src_type_data.wire_data[type_pair.src.index].name); + + auto &dst_type_data = ctx->chip_info->tile_types[type_pair.dst.type]; + IdString dst_type(dst_type_data.name); + IdString dst_wire(dst_type_data.wire_data[type_pair.dst.index].name); + +#ifdef DEBUG_FILL + log_info("At %s/%s -> %s/%s: max_fill = %d, delay_penalty = %d\n", src_type.c_str(ctx), src_wire.c_str(ctx), + dst_type.c_str(ctx), dst_wire.c_str(ctx), max_fill, delay_penalty); +#endif + } + } + + // write back the missing entries + for (auto &xy_entry : missing) { + matrix[std::get<0>(xy_entry)][std::get<1>(xy_entry)] = std::get<2>(xy_entry); + } + + if (couldnt_fill) { + auto &src_type_data = ctx->chip_info->tile_types[type_pair.src.type]; + IdString src_type(src_type_data.name); + IdString src_wire(src_type_data.wire_data[type_pair.src.index].name); + + auto &dst_type_data = ctx->chip_info->tile_types[type_pair.dst.type]; + IdString dst_type(dst_type_data.name); + IdString dst_wire(dst_type_data.wire_data[type_pair.dst.index].name); + + log_warning("Couldn't fill holes in the cost matrix %s/%s -> %s/%s %d x %d bounding box\n", src_type.c_str(ctx), + src_wire.c_str(ctx), dst_type.c_str(ctx), dst_wire.c_str(ctx), shifted_bounds.x1, + shifted_bounds.y1); + for (unsigned y = 0; y < matrix.shape()[1]; y++) { + for (unsigned x = 0; x < matrix.shape()[0]; x++) { + NPNR_ASSERT(matrix[x][y] >= 0); + } + } + } +} + +delay_t CostMap::get_penalty(const boost::multi_array &matrix) const +{ + delay_t min_delay = std::numeric_limits::max(); + delay_t max_delay = std::numeric_limits::min(); + + std::pair min_location(0, 0), max_location(0, 0); + for (unsigned ix = 0; ix < matrix.shape()[0]; ix++) { + for (unsigned iy = 0; iy < matrix.shape()[1]; iy++) { + const delay_t &cost_entry = matrix[ix][iy]; + if (cost_entry >= 0) { + if (cost_entry < min_delay) { + min_delay = cost_entry; + min_location = std::make_pair(ix, iy); + } + if (cost_entry > max_delay) { + max_delay = cost_entry; + max_location = std::make_pair(ix, iy); + } + } + } + } + + delay_t delay_penalty = + (max_delay - min_delay) / static_cast(std::max(1, manhattan_distance(max_location, min_location))); + + return delay_penalty; +} + +void CostMap::from_reader(lookahead_storage::CostMap::Reader reader) +{ + for (auto cost_entry : reader.getCostMap()) { + TypeWirePair key(cost_entry.getKey()); + + auto result = cost_map_.emplace(key, CostMapEntry()); + NPNR_ASSERT(result.second); + + CostMapEntry &entry = result.first->second; + auto data = cost_entry.getData(); + auto in_iter = data.begin(); + + entry.data.resize(boost::extents[cost_entry.getXDim()][cost_entry.getYDim()]); + if (entry.data.num_elements() != data.size()) { + log_error("entry.data.num_elements() %zu != data.size() %u", entry.data.num_elements(), data.size()); + } + + delay_t *out = entry.data.origin(); + for (; in_iter != data.end(); ++in_iter, ++out) { + *out = *in_iter; + } + + entry.penalty = cost_entry.getPenalty(); + + entry.offset.first = cost_entry.getXOffset(); + entry.offset.second = cost_entry.getYOffset(); + } +} + +void CostMap::to_builder(lookahead_storage::CostMap::Builder builder) const +{ + auto cost_map = builder.initCostMap(cost_map_.size()); + auto entry_iter = cost_map.begin(); + auto in = cost_map_.begin(); + for (; entry_iter != cost_map.end(); ++entry_iter, ++in) { + NPNR_ASSERT(in != cost_map_.end()); + + in->first.to_builder(entry_iter->getKey()); + const CostMapEntry &entry = in->second; + + auto data = entry_iter->initData(entry.data.num_elements()); + const delay_t *data_in = entry.data.origin(); + for (size_t i = 0; i < entry.data.num_elements(); ++i) { + data.set(i, data_in[i]); + } + + entry_iter->setXDim(entry.data.shape()[0]); + entry_iter->setYDim(entry.data.shape()[1]); + entry_iter->setXOffset(entry.offset.first); + entry_iter->setYOffset(entry.offset.second); + entry_iter->setPenalty(entry.penalty); + } +} + +NEXTPNR_NAMESPACE_END diff --git a/fpga_interchange/cost_map.h b/fpga_interchange/cost_map.h new file mode 100644 index 00000000..e57a1027 --- /dev/null +++ b/fpga_interchange/cost_map.h @@ -0,0 +1,68 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021 Symbiflow Authors + * + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef COST_MAP_H +#define COST_MAP_H + +#include +#include + +#include "hash_table.h" +#include "lookahead.capnp.h" +#include "nextpnr_namespaces.h" +#include "nextpnr_types.h" +#include "type_wire.h" + +NEXTPNR_NAMESPACE_BEGIN + +struct Context; + +class CostMap +{ + public: + delay_t get_delay(const Context *ctx, WireId src, WireId dst) const; + void set_cost_map(const Context *ctx, const TypeWirePair &wire_pair, + const HashTables::HashMap, delay_t> &delays); + + void from_reader(lookahead_storage::CostMap::Reader reader); + void to_builder(lookahead_storage::CostMap::Builder builder) const; + + private: + struct CostMapEntry + { + boost::multi_array data; + std::pair offset; + delay_t penalty; + }; + + std::mutex cost_map_mutex_; + HashTables::HashMap cost_map_; + + void fill_holes(const Context *ctx, const TypeWirePair &wire_pair, boost::multi_array &matrix, + delay_t delay_penality); + + std::pair get_nearby_cost_entry(const boost::multi_array &matrix, int cx, int cy, + const ArcBounds &bounds); + delay_t get_penalty(const boost::multi_array &matrix) const; +}; + +NEXTPNR_NAMESPACE_END + +#endif /* COST_MAP_H */ diff --git a/fpga_interchange/family.cmake b/fpga_interchange/family.cmake index c288736c..2b78b75c 100644 --- a/fpga_interchange/family.cmake +++ b/fpga_interchange/family.cmake @@ -24,14 +24,31 @@ add_custom_target(all-${family}-archcheck-tests) add_subdirectory(${family}/examples/devices) add_subdirectory(${family}/examples/tests) +set(PROTOS lookahead.capnp) +set(CAPNP_SRCS) +set(CAPNP_HDRS) +find_package(CapnProto REQUIRED) +foreach (proto ${PROTOS}) + capnp_generate_cpp(CAPNP_SRC CAPNP_HDR fpga_interchange/${proto}) + list(APPEND CAPNP_HDRS ${CAPNP_HDR}) + list(APPEND CAPNP_SRCS ${CAPNP_SRC}) +endforeach() + +add_library(extra_capnp STATIC ${CAPNP_SRCS}) +target_link_libraries(extra_capnp PRIVATE CapnProto::capnp) + +target_include_directories(extra_capnp INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/fpga_interchange) + foreach (target ${family_targets}) target_include_directories(${target} PRIVATE ${TCL_INCLUDE_PATH}) target_link_libraries(${target} PRIVATE ${TCL_LIBRARY}) target_link_libraries(${target} PRIVATE fpga_interchange_capnp) + target_link_libraries(${target} PRIVATE extra_capnp) target_link_libraries(${target} PRIVATE z) endforeach() if(BUILD_GUI) target_link_libraries(gui_${family} fpga_interchange_capnp) + target_link_libraries(gui_${family} extra_capnp) target_link_libraries(gui_${family} z) endif() diff --git a/fpga_interchange/lookahead.capnp b/fpga_interchange/lookahead.capnp new file mode 100644 index 00000000..e69c165d --- /dev/null +++ b/fpga_interchange/lookahead.capnp @@ -0,0 +1,61 @@ +@0x97c69817483d9dea; + +using Cxx = import "/capnp/c++.capnp"; +$Cxx.namespace("lookahead_storage"); + +using DelayType = Int32; + +struct TypeWireId { + type @0: Int32; + index @1: Int32; +} + +struct TypeWirePair { + src @0 : TypeWireId; + dst @1 : TypeWireId; +} + +struct InputSiteWireCost { + routeTo @0 : TypeWireId; + cost @1 : DelayType; +} + +struct InputSiteWireCostMap { + key @0 : TypeWireId; + value @1 : List(InputSiteWireCost); +} + +struct OutputSiteWireCostMap { + key @0 : TypeWireId; + cheapestRouteFrom @1 : TypeWireId; + cost @2 : DelayType; +} + +struct SiteToSiteCostMap { + key @0 : TypeWirePair; + cost @1 : DelayType; +} + +struct CostMapEntry { + key @0 : TypeWirePair; + data @1 : List(DelayType); + xDim @2 : UInt32; + yDim @3 : UInt32; + xOffset @4 : UInt32; + yOffset @5 : UInt32; + penalty @6 : DelayType; +} + +struct CostMap { + costMap @0 : List(CostMapEntry); +} + +struct Lookahead { + + chipdbHash @0 : Text; + inputSiteWires @1 : List(InputSiteWireCostMap); + outputSiteWires @2 : List(OutputSiteWireCostMap); + siteToSiteCost @3 : List(SiteToSiteCostMap); + costMap @4 : CostMap; +} + diff --git a/fpga_interchange/lookahead.cc b/fpga_interchange/lookahead.cc new file mode 100644 index 00000000..cd05c16f --- /dev/null +++ b/fpga_interchange/lookahead.cc @@ -0,0 +1,1548 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021 Symbiflow Authors + * + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "lookahead.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "context.h" +#include "flat_wire_map.h" +#include "log.h" +#include "sampler.h" +#include "scope_lock.h" + +#if defined(NEXTPNR_USE_TBB) +#include +#endif + +NEXTPNR_NAMESPACE_BEGIN + +static constexpr size_t kNumberSamples = 4; +static constexpr int32_t kMaxExploreDist = 20; + +// Initial only explore with a depth of this. +static constexpr int32_t kInitialExploreDepth = 30; + +struct RoutingNode +{ + WireId wire_to_expand; + delay_t cost; + int32_t depth; + + bool operator<(const RoutingNode &other) const { return cost < other.cost; } +}; + +struct PipAndCost +{ + PipId upstream_pip; + delay_t cost_from_src; + int32_t depth; +}; + +static void expand_input(const Context *ctx, WireId input_wire, HashTables::HashMap *input_costs) +{ + HashTables::HashSet seen; + std::priority_queue to_expand; + + RoutingNode initial; + initial.cost = 0; + initial.wire_to_expand = input_wire; + + to_expand.push(initial); + + while (!to_expand.empty()) { + RoutingNode node = to_expand.top(); + to_expand.pop(); + + auto result = seen.emplace(node.wire_to_expand); + if (!result.second) { + // We've already done an expansion at this wire. + continue; + } + + for (PipId pip : ctx->getPipsUphill(node.wire_to_expand)) { + if (ctx->is_pip_synthetic(pip)) { + continue; + } + WireId new_wire = ctx->getPipSrcWire(pip); + if (new_wire == WireId()) { + continue; + } + + RoutingNode next_node; + next_node.wire_to_expand = new_wire; + next_node.cost = node.cost + ctx->getPipDelay(pip).maxDelay() + ctx->getWireDelay(new_wire).maxDelay(); + + if (ctx->is_site_port(pip)) { + // Done with expansion, record the path if cheaper. + // Only the first path to each wire will be the cheapest. + + // Get local tile wire at pip dest. Using getPipSrcWire may + // return a node wire, which is not correct here. + TypeWireId route_to; + route_to.type = ctx->chip_info->tiles[pip.tile].type; + route_to.index = loc_info(ctx->chip_info, pip).pip_data[pip.index].src_index; + if (route_to.index >= 0) { + auto result = input_costs->emplace(route_to, next_node.cost); + if (!result.second && result.first->second > next_node.cost) { + result.first->second = next_node.cost; + } + } + } else { + to_expand.push(next_node); + } + } + } +} + +static void update_site_to_site_costs(const Context *ctx, WireId first_wire, + const HashTables::HashMap &best_path, + HashTables::HashMap *site_to_site_cost) +{ + for (auto &best_pair : best_path) { + WireId last_wire = best_pair.first; + TypeWirePair pair; + pair.dst = TypeWireId(ctx, last_wire); + + PipAndCost pip_and_cost = best_pair.second; + delay_t cost_from_src = pip_and_cost.cost_from_src; + + WireId cursor; + do { + cursor = ctx->getPipSrcWire(pip_and_cost.upstream_pip); + + pair.src = TypeWireId(ctx, cursor); + + delay_t cost = cost_from_src; + + // Only use the delta cost from cursor to last_wire, not the full + // cost from first_wire to last_wire. + if (cursor != first_wire) { + pip_and_cost = best_path.at(cursor); + cost -= pip_and_cost.cost_from_src; + } + + NPNR_ASSERT(cost >= 0); + + auto cost_result = site_to_site_cost->emplace(pair, cost); + if (!cost_result.second) { + // Update point to point cost if cheaper. + if (cost_result.first->second > cost) { + cost_result.first->second = cost; + } + } + } while (cursor != first_wire); + } +} + +static void expand_output(const Context *ctx, WireId output_wire, Lookahead::OutputSiteWireCost *output_cost, + HashTables::HashMap *site_to_site_cost) +{ + HashTables::HashSet seen; + std::priority_queue to_expand; + + RoutingNode initial; + initial.cost = 0; + initial.wire_to_expand = output_wire; + + to_expand.push(initial); + + HashTables::HashMap best_path; + + while (!to_expand.empty()) { + RoutingNode node = to_expand.top(); + to_expand.pop(); + + auto result = seen.emplace(node.wire_to_expand); + if (!result.second) { + // We've already done an expansion at this wire. + continue; + } + + for (PipId pip : ctx->getPipsDownhill(node.wire_to_expand)) { + if (ctx->is_pip_synthetic(pip)) { + continue; + } + WireId new_wire = ctx->getPipDstWire(pip); + if (new_wire == WireId()) { + continue; + } + + RoutingNode next_node; + next_node.wire_to_expand = new_wire; + next_node.cost = node.cost + ctx->getPipDelay(pip).maxDelay() + ctx->getWireDelay(new_wire).maxDelay(); + + if (ctx->is_site_port(pip)) { + // Done with expansion, record the path if cheaper. + + // Get local tile wire at pip dest. Using getPipDstWire may + // return a node wire, which is not correct here. + TypeWireId route_from; + route_from.type = ctx->chip_info->tiles[pip.tile].type; + route_from.index = loc_info(ctx->chip_info, pip).pip_data[pip.index].dst_index; + if (route_from.index != -1 && output_cost != nullptr && next_node.cost < output_cost->cost) { + output_cost->cost = next_node.cost; + output_cost->cheapest_route_from = route_from; + } + } else { + to_expand.push(next_node); + + auto result = best_path.emplace(new_wire, PipAndCost()); + PipAndCost &pip_and_cost = result.first->second; + if (result.second || pip_and_cost.cost_from_src > next_node.cost) { + pip_and_cost.upstream_pip = pip; + pip_and_cost.cost_from_src = next_node.cost; + } + } + } + } + + update_site_to_site_costs(ctx, output_wire, best_path, site_to_site_cost); +} + +static void expand_input_type(const Context *ctx, DeterministicRNG *rng, const Sampler &tiles_of_type, + TypeWireId input_wire, std::vector *input_costs) +{ + HashTables::HashMap input_costs_map; + for (size_t region = 0; region < tiles_of_type.number_of_regions(); ++region) { + size_t tile = tiles_of_type.get_sample_from_region(region, [rng]() -> int32_t { return rng->rng(); }); + + NPNR_ASSERT(ctx->chip_info->tiles[tile].type == input_wire.type); + WireId wire = canonical_wire(ctx->chip_info, tile, input_wire.index); + + expand_input(ctx, wire, &input_costs_map); + } + + input_costs->clear(); + input_costs->reserve(input_costs_map.size()); + for (const auto &input_pair : input_costs_map) { + input_costs->emplace_back(); + auto &input_cost = input_costs->back(); + input_cost.route_to = input_pair.first; + input_cost.cost = input_pair.second; + } +} + +struct DelayStorage +{ + HashTables::HashMap, delay_t>> storage; + int32_t max_explore_depth; +}; + +static bool has_multiple_inputs(const Context *ctx, WireId wire) +{ + size_t pip_count = 0; + for (PipId pip : ctx->getPipsUphill(wire)) { + (void)pip; + pip_count += 1; + } + + return pip_count > 1; +} + +static void update_results(const Context *ctx, const FlatWireMap &best_path, WireId src_wire, + WireId sink_wire, DelayStorage *storage) +{ + TypeWireId src_wire_type(ctx, src_wire); + + int src_tile; + if (src_wire.tile == -1) { + src_tile = ctx->chip_info->nodes[src_wire.index].tile_wires[0].tile; + } else { + src_tile = src_wire.tile; + } + + int32_t src_x, src_y; + ctx->get_tile_x_y(src_tile, &src_x, &src_y); + + TypeWirePair wire_pair; + wire_pair.src = src_wire_type; + + // The first couple wires from the site pip are usually boring, don't record + // them. + bool out_of_infeed = false; + + // Starting from end of result, walk backwards and record the path into + // the delay storage. + WireId cursor = sink_wire; + HashTables::HashSet seen; + while (cursor != src_wire) { + // No loops allowed in routing! + auto result = seen.emplace(cursor); + NPNR_ASSERT(result.second); + + if (!out_of_infeed && has_multiple_inputs(ctx, cursor)) { + out_of_infeed = true; + } + + TypeWireId dst_wire_type(ctx, cursor); + wire_pair.dst = dst_wire_type; + + int dst_tile; + if (cursor.tile == -1) { + dst_tile = ctx->chip_info->nodes[cursor.index].tile_wires[0].tile; + } else { + dst_tile = cursor.tile; + } + int32_t dst_x; + int32_t dst_y; + ctx->get_tile_x_y(dst_tile, &dst_x, &dst_y); + + std::pair dx_dy; + dx_dy.first = dst_x - src_x; + dx_dy.second = dst_y - src_y; + + const PipAndCost &pip_and_cost = best_path.at(cursor); + if (out_of_infeed) { + auto &delta_data = storage->storage[wire_pair]; + auto result2 = delta_data.emplace(dx_dy, pip_and_cost.cost_from_src); + if (!result2.second) { + if (result2.first->second > pip_and_cost.cost_from_src) { + result2.first->second = pip_and_cost.cost_from_src; + } + } + } + + cursor = ctx->getPipSrcWire(pip_and_cost.upstream_pip); + } +} + +static void expand_routing_graph_from_wire(const Context *ctx, WireId first_wire, FlatWireMap *best_path, + DelayStorage *storage) +{ + HashTables::HashSet seen; + std::priority_queue to_expand; + + int src_tile; + if (first_wire.tile == -1) { + src_tile = ctx->chip_info->nodes[first_wire.index].tile_wires[0].tile; + } else { + src_tile = first_wire.tile; + } + + int32_t src_x, src_y; + ctx->get_tile_x_y(src_tile, &src_x, &src_y); + + RoutingNode initial; + initial.cost = 0; + initial.wire_to_expand = first_wire; + initial.depth = 0; + + to_expand.push(initial); + + best_path->clear(); + size_t skip_count = 0; + + while (!to_expand.empty()) { + RoutingNode node = to_expand.top(); + to_expand.pop(); + + auto result = seen.emplace(node.wire_to_expand); + if (!result.second) { + // We've already done an expansion at this wire. + skip_count += 1; + continue; + } + + bool has_site_pip = false; + for (PipId pip : ctx->getPipsDownhill(node.wire_to_expand)) { + if (ctx->is_pip_synthetic(pip)) { + continue; + } + + // Don't expand edges that are site pips, but do record how we + // got to the pip before the site pip! + if (ctx->is_site_port(pip)) { + has_site_pip = true; + continue; + } + + WireId new_wire = ctx->getPipDstWire(pip); + if (new_wire == WireId()) { + continue; + } + + RoutingNode next_node; + next_node.wire_to_expand = new_wire; + next_node.cost = node.cost + ctx->getPipDelay(pip).maxDelay() + ctx->getWireDelay(new_wire).maxDelay(); + next_node.depth = node.depth + 1; + + // Record best path. + PipAndCost pip_and_cost; + pip_and_cost.upstream_pip = pip; + pip_and_cost.cost_from_src = next_node.cost; + pip_and_cost.depth = next_node.depth; + auto result = best_path->emplace(new_wire, pip_and_cost); + bool is_best_path = true; + if (!result.second) { + if (result.first.second->cost_from_src > next_node.cost) { + result.first.second->cost_from_src = next_node.cost; + result.first.second->upstream_pip = pip; + result.first.second->depth = next_node.depth; + } else { + is_best_path = false; + } + } + + Loc dst = ctx->getPipLocation(pip); + int32_t dst_x = dst.x; + int32_t dst_y = dst.y; + if (is_best_path && std::abs(dst_x - src_x) < kMaxExploreDist && + std::abs(dst_y - src_y) < kMaxExploreDist && next_node.depth < storage->max_explore_depth) { + to_expand.push(next_node); + } + } + + if (has_site_pip) { + update_results(ctx, *best_path, first_wire, node.wire_to_expand, storage); + } + } +} + +static bool has_multiple_outputs(const Context *ctx, WireId wire) +{ + size_t pip_count = 0; + for (PipId pip : ctx->getPipsDownhill(wire)) { + (void)pip; + pip_count += 1; + } + + return pip_count > 1; +} + +static void expand_routing_graph(const Context *ctx, DeterministicRNG *rng, const Sampler &tiles_of_type, + TypeWireId wire_type, HashTables::HashSet *types_explored, + DelayStorage *storage, HashTables::HashSet *types_deferred, + FlatWireMap *best_path) +{ + HashTables::HashSet new_types_explored; + + for (size_t region = 0; region < tiles_of_type.number_of_regions(); ++region) { + size_t tile = tiles_of_type.get_sample_from_region(region, [rng]() -> int32_t { return rng->rng(); }); + + NPNR_ASSERT(ctx->chip_info->tiles[tile].type == wire_type.type); + + WireId wire = canonical_wire(ctx->chip_info, tile, wire_type.index); + TypeWireSet wire_set(ctx, wire); + + if (!has_multiple_outputs(ctx, wire)) { + types_deferred->emplace(wire_type); + continue; + } + + new_types_explored.emplace(wire_set); + + expand_routing_graph_from_wire(ctx, wire, best_path, storage); + } + + for (const TypeWireSet &new_wire_set : new_types_explored) { + types_explored->emplace(new_wire_set); + } +} + +static WireId follow_pip_chain(const Context *ctx, WireId wire, delay_t *delay_out) +{ + delay_t delay = 0; + WireId cursor = wire; + while (true) { + WireId next; + size_t pip_count = 0; + delay_t next_delay = delay; + for (PipId pip : ctx->getPipsDownhill(cursor)) { + pip_count += 1; + next = ctx->getPipDstWire(pip); + next_delay += ctx->getPipDelay(pip).maxDelay() + ctx->getWireDelay(next).maxDelay(); + } + + if (pip_count > 1) { + *delay_out = delay; + return cursor; + } + + if (next == WireId()) { + return WireId(); + } + + delay = next_delay; + + cursor = next; + } + + // Unreachable? + NPNR_ASSERT(false); +} + +static WireId follow_pip_chain_target(const Context *ctx, WireId wire, WireId target, delay_t *delay_out) +{ + delay_t delay = 0; + WireId cursor = wire; + while (cursor != target) { + WireId next; + size_t pip_count = 0; + delay_t next_delay = delay; + for (PipId pip : ctx->getPipsDownhill(cursor)) { + pip_count += 1; + next = ctx->getPipDstWire(pip); + next_delay += ctx->getPipDelay(pip).maxDelay() + ctx->getWireDelay(next).maxDelay(); + } + + if (pip_count > 1) { + *delay_out = delay; + return cursor; + } + + if (next == WireId()) { + return WireId(); + } + + delay = next_delay; + + cursor = next; + } + + *delay_out = delay; + return cursor; +} + +static WireId follow_pip_chain_up(const Context *ctx, WireId wire, delay_t *delay_out) +{ + delay_t delay = 0; + WireId cursor = wire; + while (true) { + WireId next; + size_t pip_count = 0; + delay_t next_delay = delay; + for (PipId pip : ctx->getPipsUphill(cursor)) { + pip_count += 1; + next = ctx->getPipSrcWire(pip); + next_delay += ctx->getPipDelay(pip).maxDelay() + ctx->getWireDelay(next).maxDelay(); + } + + if (pip_count > 1) { + *delay_out = delay; + return cursor; + } + + if (next == WireId()) { + return WireId(); + } + + delay = next_delay; + + cursor = next; + } + + // Unreachable? + NPNR_ASSERT(false); +} + +static void expand_deferred_routing_graph(const Context *ctx, DeterministicRNG *rng, const Sampler &tiles_of_type, + TypeWireId wire_type, HashTables::HashSet *types_explored, + DelayStorage *storage, FlatWireMap *best_path) +{ + HashTables::HashSet new_types_explored; + + for (size_t region = 0; region < tiles_of_type.number_of_regions(); ++region) { + size_t tile = tiles_of_type.get_sample_from_region(region, [rng]() -> int32_t { return rng->rng(); }); + + NPNR_ASSERT(ctx->chip_info->tiles[tile].type == wire_type.type); + + WireId wire = canonical_wire(ctx->chip_info, tile, wire_type.index); + TypeWireSet wire_set(ctx, wire); + if (types_explored->count(wire_set)) { + // Check if this wire set has been expanded. + continue; + } + + delay_t delay; + WireId routing_wire = follow_pip_chain(ctx, wire, &delay); + + // This wire doesn't go anywhere! + if (routing_wire == WireId()) { + continue; + } + + TypeWireSet routing_wire_set(ctx, routing_wire); + if (types_explored->count(routing_wire_set)) { + continue; + } + + new_types_explored.emplace(wire_set); + expand_routing_graph_from_wire(ctx, wire, best_path, storage); + } + + for (const TypeWireSet &new_wire_set : new_types_explored) { + types_explored->emplace(new_wire_set); + } +} + +static void expand_output_type(const Context *ctx, DeterministicRNG *rng, const Sampler &tiles_of_type, + TypeWireId output_wire, Lookahead::OutputSiteWireCost *output_cost, + HashTables::HashMap *site_to_site_cost) +{ + for (size_t region = 0; region < tiles_of_type.number_of_regions(); ++region) { + size_t tile = tiles_of_type.get_sample_from_region(region, [rng]() -> int32_t { return rng->rng(); }); + + NPNR_ASSERT(ctx->chip_info->tiles[tile].type == output_wire.type); + WireId wire = canonical_wire(ctx->chip_info, tile, output_wire.index); + + expand_output(ctx, wire, output_cost, site_to_site_cost); + } +} + +static constexpr bool kWriteLookaheadCsv = false; + +void write_lookahead_csv(const Context *ctx, const DelayStorage &all_tiles_storage) +{ + FILE *lookahead_data = fopen("lookahead.csv", "w"); + NPNR_ASSERT(lookahead_data != nullptr); + fprintf(lookahead_data, "src_type,src_wire,dest_type,dest_wire,delta_x,delta_y,delay\n"); + for (const auto &type_pair : all_tiles_storage.storage) { + auto &src_wire_type = type_pair.first.src; + auto &src_type_data = ctx->chip_info->tile_types[src_wire_type.type]; + IdString src_type(src_type_data.name); + IdString src_wire(src_type_data.wire_data[src_wire_type.index].name); + + auto &dst_wire_type = type_pair.first.dst; + auto &dst_type_data = ctx->chip_info->tile_types[dst_wire_type.type]; + IdString dst_type(dst_type_data.name); + IdString dst_wire(dst_type_data.wire_data[dst_wire_type.index].name); + + for (const auto &delta_pair : type_pair.second) { + fprintf(lookahead_data, "%s,%s,%s,%s,%d,%d,%d\n", src_type.c_str(ctx), src_wire.c_str(ctx), + dst_type.c_str(ctx), dst_wire.c_str(ctx), delta_pair.first.first, delta_pair.first.second, + delta_pair.second); + } + } + + fclose(lookahead_data); +} + +// Storage for tile type expansion for lookahead. +struct ExpandLocals +{ + virtual ~ExpandLocals() {} + const std::vector *tiles_of_type; + DeterministicRNG *rng; + FlatWireMap *best_path; + DelayStorage *storage; + HashTables::HashSet *explored; + HashTables::HashSet *deferred; + + virtual void lock() {} + virtual void unlock() {} + virtual void copy_back(int32_t tile_type) {} +}; + +// Do tile type expansion for 1 tile. +static void expand_tile_type(const Context *ctx, int32_t tile_type, ExpandLocals *locals) +{ + auto &type_data = ctx->chip_info->tile_types[tile_type]; + if (ctx->verbose) { + ScopeLock lock(locals); + log_info("Expanding all wires in type %s\n", IdString(type_data.name).c_str(ctx)); + } + + auto &tile_sampler = (*locals->tiles_of_type)[tile_type]; + for (size_t wire_index = 0; wire_index < type_data.wire_data.size(); ++wire_index) { + auto &wire_data = type_data.wire_data[wire_index]; + if (wire_data.site != -1) { + // Skip site wires + continue; + } + + if (ctx->debug) { + ScopeLock lock(locals); + log_info("Expanding wire %s in type %s (%d/%zu, seen %zu types, deferred %zu types)\n", + IdString(wire_data.name).c_str(ctx), IdString(type_data.name).c_str(ctx), tile_type, + ctx->chip_info->tile_types.size(), locals->explored->size(), locals->deferred->size()); + } + + TypeWireId wire; + wire.type = tile_type; + wire.index = wire_index; + + expand_routing_graph(ctx, locals->rng, tile_sampler, wire, locals->explored, locals->storage, locals->deferred, + locals->best_path); + } + + locals->copy_back(tile_type); +} + +// Function that does all tile expansions serially. +static void expand_tile_type_serial(const Context *ctx, const std::vector &tile_types, + const std::vector &tiles_of_type, DeterministicRNG *rng, + FlatWireMap *best_path, DelayStorage *storage, + HashTables::HashSet *explored, + HashTables::HashSet *deferred, HashTables::HashSet *tiles_left) +{ + + for (int32_t tile_type : tile_types) { + ExpandLocals locals; + + locals.tiles_of_type = &tiles_of_type; + locals.rng = rng; + locals.best_path = best_path; + locals.storage = storage; + locals.explored = explored; + expand_tile_type(ctx, tile_type, &locals); + + NPNR_ASSERT(tiles_left->erase(tile_type) == 1); + } + + NPNR_ASSERT(tiles_left->empty()); +} + +// Additional storage for doing tile type expansion in parallel. +struct TbbExpandLocals : public ExpandLocals +{ + const Context *ctx; + std::mutex *all_costs_mutex; + + DelayStorage *all_tiles_storage; + HashTables::HashSet *types_explored; + HashTables::HashSet *types_deferred; + HashTables::HashSet *tiles_left; + + void lock() override { all_costs_mutex->lock(); } + + void unlock() override { all_costs_mutex->unlock(); } + + void copy_back(int32_t tile_type) override + { + ScopeLock locker(this); + + auto &type_data = ctx->chip_info->tile_types[tile_type]; + + // Copy per tile data by to over all data structures. + if (ctx->verbose) { + log_info("Expanded all wires in type %s, merging data back\n", IdString(type_data.name).c_str(ctx)); + log_info("Testing %zu wires, saw %zu types, deferred %zu types\n", type_data.wire_data.size(), + explored->size(), deferred->size()); + } + + // Copy cheapest explored paths back to all_tiles_storage. + for (const auto &type_pair : storage->storage) { + auto &type_pair_data = all_tiles_storage->storage[type_pair.first]; + for (const auto &delta_pair : type_pair.second) { + // See if this dx/dy already has data. + auto result = type_pair_data.emplace(delta_pair.first, delta_pair.second); + if (!result.second) { + // This was already in the map, check if this new result is + // better + if (delta_pair.second < result.first->second) { + result.first->second = delta_pair.second; + } + } + } + } + + // Update explored and deferred sets. + for (auto &key : *explored) { + types_explored->emplace(key); + } + for (auto &key : *deferred) { + types_deferred->emplace(key); + } + + NPNR_ASSERT(tiles_left->erase(tile_type)); + + if (ctx->verbose) { + log_info("Done merging data from type %s, %zu tiles left\n", IdString(type_data.name).c_str(ctx), + tiles_left->size()); + } + } +}; + +// Wrapper method used if running expansion in parallel. +// +// expand_tile_type is invoked using thread local data, and then afterwards +// the data is joined with the global data. +static void expand_tile_type_parallel(const Context *ctx, int32_t tile_type, const std::vector &tiles_of_type, + DeterministicRNG *rng, std::mutex *all_costs_mutex, + DelayStorage *all_tiles_storage, HashTables::HashSet *types_explored, + HashTables::HashSet *types_deferred, + HashTables::HashSet *tiles_left) +{ + TbbExpandLocals locals; + + DeterministicRNG rng_copy = *rng; + FlatWireMap best_path(ctx); + HashTables::HashSet explored; + HashTables::HashSet deferred; + DelayStorage storage; + storage.max_explore_depth = all_tiles_storage->max_explore_depth; + + locals.tiles_of_type = &tiles_of_type; + locals.rng = &rng_copy; + locals.best_path = &best_path; + locals.storage = &storage; + locals.explored = &explored; + locals.deferred = &deferred; + + locals.ctx = ctx; + locals.all_costs_mutex = all_costs_mutex; + locals.all_tiles_storage = all_tiles_storage; + locals.types_explored = types_explored; + locals.types_deferred = types_deferred; + locals.tiles_left = tiles_left; + + expand_tile_type(ctx, tile_type, &locals); +} + +void Lookahead::build_lookahead(const Context *ctx, DeterministicRNG *rng) +{ + auto start = std::chrono::high_resolution_clock::now(); + + if (ctx->verbose) { + log_info("Building lookahead, first gathering input and output site wires\n"); + } + + HashTables::HashSet input_site_ports; + for (BelId bel : ctx->getBels()) { + const auto &bel_data = bel_info(ctx->chip_info, bel); + + for (IdString pin : ctx->getBelPins(bel)) { + WireId pin_wire = ctx->getBelPinWire(bel, pin); + if (pin_wire == WireId()) { + continue; + } + + PortType type = ctx->getBelPinType(bel, pin); + + if (type == PORT_IN && bel_data.category == BEL_CATEGORY_LOGIC) { + input_site_wires.emplace(TypeWireId(ctx, pin_wire), std::vector()); + } else if (type == PORT_OUT && bel_data.category == BEL_CATEGORY_LOGIC) { + output_site_wires.emplace(TypeWireId(ctx, pin_wire), OutputSiteWireCost()); + } else if (type == PORT_OUT && bel_data.category == BEL_CATEGORY_SITE_PORT) { + input_site_ports.emplace(TypeWireId(ctx, pin_wire)); + } + } + } + + if (ctx->verbose) { + log_info("Have %zu input and %zu output site wire types. Creating tile type samplers\n", + input_site_wires.size(), output_site_wires.size()); + } + + std::vector tiles_of_type; + tiles_of_type.resize(ctx->chip_info->tile_types.ssize()); + + std::vector indicies; + std::vector> xys; + indicies.reserve(ctx->chip_info->tiles.size()); + xys.reserve(ctx->chip_info->tiles.size()); + + for (int32_t tile_type = 0; tile_type < ctx->chip_info->tile_types.ssize(); ++tile_type) { + indicies.clear(); + xys.clear(); + + for (size_t tile = 0; tile < ctx->chip_info->tiles.size(); ++tile) { + if (ctx->chip_info->tiles[tile].type != tile_type) { + continue; + } + + std::pair xy; + ctx->get_tile_x_y(tile, &xy.first, &xy.second); + + indicies.push_back(tile); + xys.push_back(xy); + } + + auto &tile_sampler = tiles_of_type[tile_type]; + tile_sampler.divide_samples(kNumberSamples, xys); + + // Remap Sampler::indicies from 0 to number of tiles of type to + // absolute tile indicies. + for (size_t i = 0; i < indicies.size(); ++i) { + tile_sampler.indicies[i] = indicies[tile_sampler.indicies[i]]; + } + } + + if (ctx->verbose) { + log_info("Expanding input site wires\n"); + } + + // Expand backwards from each input site wire to find the cheapest + // non-site wire. + for (auto &input_pair : input_site_wires) { + expand_input_type(ctx, rng, tiles_of_type[input_pair.first.type], input_pair.first, &input_pair.second); + } + + if (ctx->verbose) { + log_info("Expanding output site wires\n"); + } + + // Expand forward from each output site wire to find the cheapest + // non-site wire. + for (auto &output_pair : output_site_wires) { + output_pair.second.cost = std::numeric_limits::max(); + expand_output_type(ctx, rng, tiles_of_type[output_pair.first.type], output_pair.first, &output_pair.second, + &site_to_site_cost); + } + for (TypeWireId input_site_port : input_site_ports) { + expand_output_type(ctx, rng, tiles_of_type[input_site_port.type], input_site_port, nullptr, &site_to_site_cost); + } + + if (ctx->verbose) { + log_info("Expanding all wire types\n"); + } + + DelayStorage all_tiles_storage; + all_tiles_storage.max_explore_depth = kInitialExploreDepth; + + // These are wire types that have been explored. + HashTables::HashSet types_explored; + + // These are wire types that have been deferred because they are trival + // copies of another wire type. These can be cheaply computed after the + // graph has been explored. + HashTables::HashSet types_deferred; + + std::vector tile_types; + HashTables::HashSet tiles_left; + tile_types.reserve(ctx->chip_info->tile_types.size()); + for (int32_t tile_type = 0; tile_type < ctx->chip_info->tile_types.ssize(); ++tile_type) { + tile_types.push_back(tile_type); + tiles_left.emplace(tile_type); + } + + FlatWireMap best_path(ctx); + + // Walk each tile type, and expand all non-site wires in the tile. + // Wires that are nodes will expand as if the node type is the first node + // in the wire. + // + // Wires that only have 1 output pip are deferred until the next loop, + // because generally those wires will get explored via another wire. + // The deferred will be expanded if this assumption doesn't hold. + bool expand_serially = true; +#if defined(NEXTPNR_USE_TBB) // Run parallely + { + std::mutex all_costs_mutex; + + expand_serially = false; + tbb::parallel_for_each(tile_types, [&](int32_t tile_type) { + expand_tile_type_parallel(ctx, tile_type, tiles_of_type, rng, &all_costs_mutex, &all_tiles_storage, + &types_explored, &types_deferred, &tiles_left); + }); + } +#else + // Supress warning that expand_tile_type_parallel if not running in + // parallel. + (void)expand_tile_type_parallel; +#endif + if (expand_serially) { + expand_tile_type_serial(ctx, tile_types, tiles_of_type, rng, &best_path, &all_tiles_storage, &types_explored, + &types_deferred, &tiles_left); + } + + // Check to see if deferred wire types were expanded. If they were not + // expanded, expand them now. If they were expanded, copy_types is + // populated with the wire types that can just copy the relevant data from + // another wire type. + for (TypeWireId wire_type : types_deferred) { + auto &type_data = ctx->chip_info->tile_types[wire_type.type]; + auto &tile_sampler = tiles_of_type[wire_type.type]; + auto &wire_data = type_data.wire_data[wire_type.index]; + + if (ctx->verbose) { + log_info("Expanding deferred wire %s in type %s (seen %zu types)\n", IdString(wire_data.name).c_str(ctx), + IdString(type_data.name).c_str(ctx), types_explored.size()); + } + + expand_deferred_routing_graph(ctx, rng, tile_sampler, wire_type, &types_explored, &all_tiles_storage, + &best_path); + } + + auto end = std::chrono::high_resolution_clock::now(); + if (ctx->verbose) { + log_info("Done with expansion, dt %02fs\n", std::chrono::duration(end - start).count()); + } + + if (kWriteLookaheadCsv) { + write_lookahead_csv(ctx, all_tiles_storage); + end = std::chrono::high_resolution_clock::now(); + if (ctx->verbose) { + log_info("Done writing data to disk, dt %02fs\n", std::chrono::duration(end - start).count()); + } + } + +#if defined(NEXTPNR_USE_TBB) // Run parallely + tbb::parallel_for_each( + all_tiles_storage.storage, + [&](const std::pair, delay_t>> &type_pair) { +#else + for (const auto &type_pair : all_tiles_storage.storage) { +#endif + cost_map.set_cost_map(ctx, type_pair.first, type_pair.second); +#if defined(NEXTPNR_USE_TBB) // Run parallely + }); +#else + } +#endif + + end = std::chrono::high_resolution_clock::now(); + if (ctx->verbose) { + log_info("build_lookahead time %.02fs\n", std::chrono::duration(end - start).count()); + } +} + +constexpr static bool kUseGzipForLookahead = false; + +static void write_message(::capnp::MallocMessageBuilder &message, const std::string &filename) +{ + kj::Array words = messageToFlatArray(message); + kj::ArrayPtr bytes = words.asBytes(); + + boost::filesystem::path temp = boost::filesystem::unique_path(); + log_info("Writing tempfile to %s\n", temp.c_str()); + + if (kUseGzipForLookahead) { + gzFile file = gzopen(temp.c_str(), "w"); + NPNR_ASSERT(file != Z_NULL); + + size_t bytes_written = 0; + int result; + while (bytes_written < bytes.size()) { + size_t bytes_remaining = bytes.size() - bytes_written; + size_t bytes_to_write = bytes_remaining; + if (bytes_to_write >= std::numeric_limits::max()) { + bytes_to_write = std::numeric_limits::max(); + } + result = gzwrite(file, &bytes[0] + bytes_written, bytes_to_write); + if (result < 0) { + break; + } + + bytes_written += result; + } + + int error; + std::string error_str; + if (result < 0) { + error_str.assign(gzerror(file, &error)); + } + NPNR_ASSERT(gzclose(file) == Z_OK); + if (bytes_written != bytes.size()) { + // Remove failed writes before reporting error. + boost::filesystem::remove(temp); + } + + if (result < 0) { + log_error("Failed to write lookahead, error from gzip %s\n", error_str.c_str()); + } else { + if (bytes_written != bytes.size()) { + log_error("Failed to write lookahead, wrote %d bytes, had %zu bytes\n", result, bytes.size()); + } else { + // Written, move file into place + boost::filesystem::rename(temp, filename); + } + } + } else { + { + kj::Own fs = kj::newDiskFilesystem(); + + auto path = kj::Path::parse(temp); + auto file = fs->getCurrent().openFile(path, kj::WriteMode::CREATE); + file->writeAll(bytes); + } + + boost::filesystem::rename(temp, filename); + } +} + +bool Lookahead::read_lookahead(const std::string &chipdb_hash, const std::string &filename) +{ + capnp::ReaderOptions reader_options; + reader_options.traversalLimitInWords = 32llu * 1024llu * 1024llu * 1024llu; + + if (kUseGzipForLookahead) { + gzFile file = gzopen(filename.c_str(), "r"); + if (file == Z_NULL) { + return false; + } + + std::vector output_data; + output_data.resize(4096); + std::stringstream sstream(std::ios_base::in | std::ios_base::out | std::ios_base::binary); + while (true) { + int ret = gzread(file, output_data.data(), output_data.size()); + NPNR_ASSERT(ret >= 0); + if (ret > 0) { + sstream.write((const char *)output_data.data(), ret); + NPNR_ASSERT(sstream); + } else { + NPNR_ASSERT(ret == 0); + int error; + gzerror(file, &error); + NPNR_ASSERT(error == Z_OK); + break; + } + } + + NPNR_ASSERT(gzclose(file) == Z_OK); + + sstream.seekg(0); + kj::std::StdInputStream istream(sstream); + capnp::InputStreamMessageReader message_reader(istream, reader_options); + + lookahead_storage::Lookahead::Reader lookahead = message_reader.getRoot(); + return from_reader(chipdb_hash, lookahead); + } else { + boost::iostreams::mapped_file_source file; + try { + file.open(filename.c_str()); + } catch (std::ios_base::failure &fail) { + return false; + } + + if (!file.is_open()) { + return false; + } + + const char *data = reinterpret_cast(file.data()); + const kj::ArrayPtr words = + kj::arrayPtr(reinterpret_cast(data), file.size() / sizeof(::capnp::word)); + ::capnp::FlatArrayMessageReader reader(words, reader_options); + lookahead_storage::Lookahead::Reader lookahead = reader.getRoot(); + return from_reader(chipdb_hash, lookahead); + } +} + +void Lookahead::write_lookahead(const std::string &chipdb_hash, const std::string &file) const +{ + ::capnp::MallocMessageBuilder message; + + lookahead_storage::Lookahead::Builder lookahead = message.initRoot(); + to_builder(chipdb_hash, lookahead); + write_message(message, file); +} + +void Lookahead::init(const Context *ctx, DeterministicRNG *rng) +{ + std::string lookahead_filename; + if (kUseGzipForLookahead) { + lookahead_filename = ctx->args.chipdb + ".lookahead.tgz"; + } else { + lookahead_filename = ctx->args.chipdb + ".lookahead"; + } + + std::string chipdb_hash = ctx->get_chipdb_hash(); + + if (ctx->args.rebuild_lookahead || !read_lookahead(chipdb_hash, lookahead_filename)) { + build_lookahead(ctx, rng); + if (!ctx->args.dont_write_lookahead) { + write_lookahead(chipdb_hash, lookahead_filename); + } + } +} + +static bool safe_add_i32(int32_t a, int32_t b, int32_t *out) +{ +#if defined(__GNUG__) || defined(__clang__) + // GCC and clang have had __builtin_add_overflow for a while. + return !__builtin_add_overflow(a, b, out); +#else + // MSVC and other don't have an intrinsic. Emit some more code. + bool safe_to_add; + if (b < 0) { + safe_to_add = a >= std::numeric_limits::min() - b; + } else { + safe_to_add = a <= std::numeric_limits::max() - b; + } + if (!safe_to_add) { + return false; + } + *out = a + b; + return true; +#endif +} + +static void saturating_incr(int32_t *acc, int32_t value) +{ + if (!safe_add_i32(*acc, value, acc)) { + if (value > 0) { + *acc = std::numeric_limits::max(); + } else { + *acc = std::numeric_limits::min(); + } + } +} + +#define DEBUG_LOOKUP + +delay_t Lookahead::estimateDelay(const Context *ctx, WireId src, WireId dst) const +{ +#ifdef DEBUG_LOOKUP + if (ctx->debug) { + log_info("Looking up %s to %s\n", ctx->nameOfWire(src), ctx->nameOfWire(dst)); + } +#endif + delay_t delay = 0; + + // Follow chain down, chasing wires with only 1 pip. Stop if dst is + // reached. + WireId orig_src = src; + src = follow_pip_chain_target(ctx, src, dst, &delay); + NPNR_ASSERT(delay >= 0); + if (src == WireId()) { + // This src wire is a dead end, tell router to avoid it! +#ifdef DEBUG_LOOKUP + if (ctx->debug) { + log_info("Source %s is a dead end!\n", ctx->nameOfWire(orig_src)); + } +#endif + return std::numeric_limits::max(); + } + +#ifdef DEBUG_LOOKUP + if (ctx->debug && src != orig_src) { + log_info("Moving src from %s to %s, delay = %d\n", ctx->nameOfWire(orig_src), ctx->nameOfWire(src), delay); + } +#endif + + if (src == dst) { + // Reached target already, done! + return delay; + } + + if (ctx->is_same_site(src, dst)) { + // Check for site to site direct path. + TypeWirePair pair; + + TypeWireId src_type(ctx, src); + pair.src = src_type; + + TypeWireId dst_type(ctx, dst); + pair.dst = dst_type; + + auto iter = site_to_site_cost.find(pair); + if (iter != site_to_site_cost.end()) { + NPNR_ASSERT(iter->second >= 0); + saturating_incr(&delay, iter->second); +#ifdef DEBUG_LOOKUP + if (ctx->debug) { + log_info("Found site to site direct path %s -> %s = %d\n", ctx->nameOfWire(src), ctx->nameOfWire(dst), + delay); + } +#endif + return delay; + } + } + + // At this point we know that the routing interconnect is needed, or + // the pair is unreachable. + orig_src = src; + TypeWireId src_type(ctx, src); + + // Find the first routing wire from the src_type. + auto src_iter = output_site_wires.find(src_type); + if (src_iter != output_site_wires.end()) { + NPNR_ASSERT(src_iter->second.cost >= 0); + saturating_incr(&delay, src_iter->second.cost); + src_type = src_iter->second.cheapest_route_from; + + src = canonical_wire(ctx->chip_info, src.tile, src_type.index); +#ifdef DEBUG_LOOKUP + if (ctx->debug) { + log_info("Moving src from %s to %s, delay = %d\n", ctx->nameOfWire(orig_src), ctx->nameOfWire(src), delay); + } +#endif + } + + // Make sure that the new wire is in the routing graph. + if (ctx->is_wire_in_site(src)) { +#ifdef DEBUG_LOOKUP + // We've already tested for direct site to site routing, if src cannot + // reach outside of the routing network, this path is impossible. + if (ctx->debug) { + log_warning("Failed to reach routing network for src %s, got to %s\n", ctx->nameOfWire(orig_src), + ctx->nameOfWire(src)); + } +#endif + return std::numeric_limits::max(); + } + + if (src == dst) { + // Reached target already, done! + return delay; + } + + // Find the first routing wire that reaches dst_type. + WireId orig_dst = dst; + TypeWireId dst_type(ctx, dst); + + auto dst_iter = input_site_wires.find(dst_type); + if (dst_iter == input_site_wires.end()) { + // dst_type isn't an input site wire, just add point to point delay. + auto &dst_data = ctx->wire_info(dst); + if (dst_data.site != -1) { +#ifdef DEBUG_LOOKUP + // We've already tested for direct site to site routing, if dst cannot + // be reached from the routing network, this path is impossible. + if (ctx->debug) { + log_warning("Failed to reach routing network for dst %s, got to %s\n", ctx->nameOfWire(orig_dst), + ctx->nameOfWire(dst)); + } +#endif + return std::numeric_limits::max(); + } + + // Follow chain up + WireId orig_dst = dst; + (void)orig_dst; + + delay_t chain_delay; + dst = follow_pip_chain_up(ctx, dst, &chain_delay); + NPNR_ASSERT(chain_delay >= 0); + saturating_incr(&delay, chain_delay); +#ifdef DEBUG_LOOKUP + if (ctx->debug && dst != orig_dst) { + log_info("Moving dst from %s to %s, delay = %d\n", ctx->nameOfWire(orig_dst), ctx->nameOfWire(dst), delay); + } +#endif + + if (src == dst) { + // Reached target already, done! + return delay; + } + + // Both src and dst are in the routing graph, lookup approx cost to go + // from src to dst. + int32_t delay_from_map = cost_map.get_delay(ctx, src, dst); + NPNR_ASSERT(delay_from_map >= 0); + saturating_incr(&delay, delay_from_map); + +#ifdef DEBUG_LOOKUP + if (ctx->debug) { + log_info("Final delay = %d\n", delay); + } +#endif + + return delay; + } else { + // dst_type is an input site wire, try each possible routing path. + delay_t base_delay = delay; + delay_t cheapest_path = std::numeric_limits::max(); + + for (const InputSiteWireCost &input_cost : dst_iter->second) { + dst = orig_dst; + delay = base_delay; + + NPNR_ASSERT(input_cost.cost >= 0); + saturating_incr(&delay, input_cost.cost); + dst_type = input_cost.route_to; + + NPNR_ASSERT(dst_type.index != -1); + dst = canonical_wire(ctx->chip_info, dst.tile, dst_type.index); + NPNR_ASSERT(dst != WireId()); + +#ifdef DEBUG_LOOKUP + if (ctx->debug) { + log_info("Moving dst from %s to %s, delay = %d\n", ctx->nameOfWire(orig_dst), ctx->nameOfWire(dst), + delay); + } +#endif + + if (dst == src) { +#ifdef DEBUG_LOOKUP + if (ctx->debug) { + log_info("Possible delay = %d\n", delay); + } +#endif + // Reached target already, done! + cheapest_path = std::min(delay, cheapest_path); + continue; + } + + auto &dst_data = ctx->wire_info(dst); + if (dst_data.site != -1) { +#ifdef DEBUG_LOOKUP + // We've already tested for direct site to site routing, if dst cannot + // be reached from the routing network, this path is impossible. + if (ctx->debug) { + log_warning("Failed to reach routing network for dst %s, got to %s\n", ctx->nameOfWire(orig_dst), + ctx->nameOfWire(dst)); + } +#endif + continue; + } + + // Follow chain up + WireId orig_dst = dst; + (void)orig_dst; + + delay_t chain_delay; + dst = follow_pip_chain_up(ctx, dst, &chain_delay); + NPNR_ASSERT(chain_delay >= 0); + saturating_incr(&delay, chain_delay); +#ifdef DEBUG_LOOKUP + if (ctx->debug && dst != orig_dst) { + log_info("Moving dst from %s to %s, delay = %d\n", ctx->nameOfWire(orig_dst), ctx->nameOfWire(dst), + delay); + } +#endif + + if (dst == WireId()) { + // This dst wire is a dead end, don't examine it! +#ifdef DEBUG_LOOKUP + if (ctx->debug) { + log_info("Dest %s is a dead end!\n", ctx->nameOfWire(dst)); + } +#endif + continue; + } + + if (src == dst) { +#ifdef DEBUG_LOOKUP + if (ctx->debug) { + log_info("Possible delay = %d\n", delay); + } +#endif + // Reached target already, done! + cheapest_path = std::min(delay, cheapest_path); + continue; + } + + // Both src and dst are in the routing graph, lookup approx cost to go + // from src to dst. + int32_t delay_from_map = cost_map.get_delay(ctx, src, dst); + NPNR_ASSERT(delay_from_map >= 0); + saturating_incr(&delay, delay_from_map); + cheapest_path = std::min(delay, cheapest_path); +#ifdef DEBUG_LOOKUP + if (ctx->debug) { + log_info("Possible delay = %d\n", delay); + } +#endif + } + +#ifdef DEBUG_LOOKUP + if (ctx->debug) { + log_info("Final delay = %d\n", delay); + } +#endif + + return cheapest_path; + } +} + +bool Lookahead::from_reader(const std::string &chipdb_hash, lookahead_storage::Lookahead::Reader reader) +{ + std::string expected_hash = reader.getChipdbHash(); + if (chipdb_hash != expected_hash) { + return false; + } + + input_site_wires.clear(); + output_site_wires.clear(); + site_to_site_cost.clear(); + + for (auto input_reader : reader.getInputSiteWires()) { + TypeWireId key(input_reader.getKey()); + + auto result = input_site_wires.emplace(key, std::vector()); + NPNR_ASSERT(result.second); + std::vector &costs = result.first->second; + auto value = input_reader.getValue(); + costs.reserve(value.size()); + for (auto cost : value) { + costs.emplace_back(InputSiteWireCost{TypeWireId(cost.getRouteTo()), cost.getCost()}); + } + } + + for (auto output_reader : reader.getOutputSiteWires()) { + TypeWireId key(output_reader.getKey()); + + auto result = output_site_wires.emplace( + key, OutputSiteWireCost{TypeWireId(output_reader.getCheapestRouteFrom()), output_reader.getCost()}); + NPNR_ASSERT(result.second); + } + + for (auto site_to_site_reader : reader.getSiteToSiteCost()) { + TypeWirePair key(site_to_site_reader.getKey()); + auto result = site_to_site_cost.emplace(key, site_to_site_reader.getCost()); + NPNR_ASSERT(result.second); + } + + cost_map.from_reader(reader.getCostMap()); + + return true; +} + +void Lookahead::to_builder(const std::string &chipdb_hash, lookahead_storage::Lookahead::Builder builder) const +{ + builder.setChipdbHash(chipdb_hash); + + auto input_out = builder.initInputSiteWires(input_site_wires.size()); + auto in = input_site_wires.begin(); + for (auto out = input_out.begin(); out != input_out.end(); ++out, ++in) { + NPNR_ASSERT(in != input_site_wires.end()); + + const TypeWireId &key = in->first; + key.to_builder(out->getKey()); + + const std::vector &costs = in->second; + auto value = out->initValue(costs.size()); + + auto value_in = costs.begin(); + for (auto value_out = value.begin(); value_out != value.end(); ++value_out, ++value_in) { + value_in->route_to.to_builder(value_out->getRouteTo()); + value_out->setCost(value_in->cost); + } + } + + auto output_out = builder.initOutputSiteWires(output_site_wires.size()); + auto out = output_site_wires.begin(); + for (auto out2 = output_out.begin(); out2 != output_out.end(); ++out, ++out2) { + NPNR_ASSERT(out != output_site_wires.end()); + + const TypeWireId &key = out->first; + key.to_builder(out2->getKey()); + + const TypeWireId &cheapest_route_from = out->second.cheapest_route_from; + cheapest_route_from.to_builder(out2->getCheapestRouteFrom()); + + out2->setCost(out->second.cost); + } + + auto site_out = builder.initSiteToSiteCost(site_to_site_cost.size()); + auto site = site_to_site_cost.begin(); + for (auto out2 = site_out.begin(); out2 != site_out.end(); ++out2, ++site) { + NPNR_ASSERT(site != site_to_site_cost.end()); + + const TypeWirePair &key = site->first; + key.to_builder(out2->getKey()); + out2->setCost(site->second); + } + + cost_map.to_builder(builder.getCostMap()); +} + +NEXTPNR_NAMESPACE_END diff --git a/fpga_interchange/lookahead.h b/fpga_interchange/lookahead.h new file mode 100644 index 00000000..b9d352d5 --- /dev/null +++ b/fpga_interchange/lookahead.h @@ -0,0 +1,99 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021 Symbiflow Authors + * + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef LOOKAHEAD_H +#define LOOKAHEAD_H + +#include +#include + +#include "cost_map.h" +#include "deterministic_rng.h" +#include "hash_table.h" +#include "lookahead.capnp.h" +#include "nextpnr_namespaces.h" +#include "type_wire.h" + +NEXTPNR_NAMESPACE_BEGIN + +// Lookahead is a routing graph generic lookahead builder and evaluator. +// +// The lookahead data model is structured into 3 parts: +// - Output site wires to routing network cost +// - Routing network point to point cost +// - Routing network cost to input site wires +// +// If the lookahead is invoked from a routing wire to a routing wire, only +// the point to point cost is used. +// +// If the lookahead is invoked from an output site wire to a routing wire, +// the point to point cost is computed using the cheapest output routing wire +// from the current site wire and then returned cost is the sum of the output +// cost plus the point to point routing network cost. +// +// If the lookahead is invoked from a routing wire to an input site wire, +// then the cost is the point to point routing cost to the cheapest input +// routing wire plus the input routing cost. +// +// If the lookahead is invoked from an output site wire to an input site wire, +// then cost is the sum of each of the 3 parts. +struct Lookahead +{ + void init(const Context *, DeterministicRNG *rng); + void build_lookahead(const Context *, DeterministicRNG *rng); + + bool read_lookahead(const std::string &chipdb_hash, const std::string &file); + void write_lookahead(const std::string &chipdb_hash, const std::string &file) const; + bool from_reader(const std::string &chipdb_hash, lookahead_storage::Lookahead::Reader reader); + void to_builder(const std::string &chipdb_hash, lookahead_storage::Lookahead::Builder builder) const; + + delay_t estimateDelay(const Context *, WireId src, WireId dst) const; + + struct InputSiteWireCost + { + // This wire is the cheapest non-site wire that leads to this site + // wire. + TypeWireId route_to; + + // This is the cost from the cheapest_route_to wire to the site wire in + // question. + delay_t cost; + }; + + struct OutputSiteWireCost + { + // This wire is the cheapest non-site wire that is reachable from + // this site wire. + TypeWireId cheapest_route_from; + + // This is the cost from the site wire in question to + // cheapest_route_from wire. + delay_t cost; + }; + + HashTables::HashMap> input_site_wires; + HashTables::HashMap output_site_wires; + HashTables::HashMap site_to_site_cost; + CostMap cost_map; +}; + +NEXTPNR_NAMESPACE_END + +#endif /* LOOKAHEAD_H */ diff --git a/fpga_interchange/main.cc b/fpga_interchange/main.cc index 958f1d95..4d331a32 100644 --- a/fpga_interchange/main.cc +++ b/fpga_interchange/main.cc @@ -55,6 +55,8 @@ po::options_description FpgaInterchangeCommandHandler::getArchOptions() specific.add_options()("netlist", po::value(), "FPGA interchange logical netlist to read"); specific.add_options()("phys", po::value(), "FPGA interchange Physical netlist to write"); specific.add_options()("package", po::value(), "Package to use"); + specific.add_options()("rebuild-lookahead", "Ignore lookahead cache and rebuild"); + specific.add_options()("dont-write-lookahead", "Don't write the lookahead file"); return specific; } @@ -72,6 +74,9 @@ std::unique_ptr FpgaInterchangeCommandHandler::createContext(std::unord auto start = std::chrono::high_resolution_clock::now(); ArchArgs chipArgs; + chipArgs.rebuild_lookahead = vm.count("rebuild_lookahead") != 0; + chipArgs.dont_write_lookahead = vm.count("dont_write_lookahead") != 0; + if (!vm.count("chipdb")) { log_error("chip database binary must be provided\n"); } diff --git a/fpga_interchange/sampler.cc b/fpga_interchange/sampler.cc new file mode 100644 index 00000000..0868867e --- /dev/null +++ b/fpga_interchange/sampler.cc @@ -0,0 +1,174 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021 Symbiflow Authors + * + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "sampler.h" +#include +#include +#include + +NEXTPNR_NAMESPACE_BEGIN + +static size_t partition_x(std::vector::iterator begin, std::vector::iterator end, + const std::vector> &samples) +{ + if (std::distance(begin, end) == 0) { + return 0; + } + + // Find the median x value. + std::vector xs; + xs.reserve(std::distance(begin, end)); + + for (auto iter = begin; iter != end; ++iter) { + xs.push_back(samples[*iter].first); + } + + std::sort(xs.begin(), xs.end()); + xs.erase(std::unique(xs.begin(), xs.end()), xs.end()); + + // Partion on the median x value (e.g. 50% of samples on one side and + // 50% of samples on the other side). + int32_t x_div = xs[(xs.size() - 1) / 2]; + + auto split = std::partition(begin, end, + [x_div, &samples](size_t index) -> bool { return samples[index].first <= x_div; }); + + return std::distance(begin, split); +} + +/* Don't both splitting when the partition has less than kMinSplit. */ +static constexpr ptrdiff_t kMinSplit = 20; + +static size_t partition_y(std::vector::iterator begin, std::vector::iterator end, + const std::vector> &samples) +{ + if (std::distance(begin, end) == 0) { + return 0; + } + + std::vector ys; + ys.reserve(std::distance(begin, end)); + + for (auto iter = begin; iter != end; ++iter) { + ys.push_back(samples[*iter].second); + } + + std::sort(ys.begin(), ys.end()); + ys.erase(std::unique(ys.begin(), ys.end()), ys.end()); + + int32_t y_div = ys[(ys.size() - 1) / 2]; + + auto split = std::partition(begin, end, + [y_div, &samples](size_t index) -> bool { return samples[index].second <= y_div; }); + + return std::distance(begin, split); +} + +static void add_split(std::vector *splits, size_t new_split) +{ + if (splits->back() < new_split) { + splits->push_back(new_split); + } else if (splits->back() != new_split) { + throw std::runtime_error("Split is not consectutive!"); + } +} + +void Sampler::divide_samples(size_t target_sample_count, const std::vector> &samples) +{ + // Initialize indicies lookup and make 1 split with entire sample range. + indicies.resize(samples.size()); + for (size_t i = 0; i < samples.size(); ++i) { + indicies[i] = i; + } + + splits.reserve(2); + splits.push_back(0); + splits.push_back(samples.size()); + + size_t divisions = std::ceil(std::sqrt(target_sample_count) / 2.); + if (divisions == 0) { + throw std::runtime_error("Math failure, unreachable!"); + } + + if (divisions > samples.size()) { + // Handle cases where there are few samples. + return; + } + + // Recursively split samples first 50% / 50% in x direction, and then + // 50% / 50% in y direction. Repeat until the bucket is smaller than + // kMinSplit or the samples have been divided `divisions` times. + std::vector new_splits; + for (size_t division_count = 0; division_count < divisions; ++division_count) { + new_splits.clear(); + new_splits.push_back(0); + for (size_t i = 0; i < splits.size() - 1; ++i) { + size_t split_begin = splits.at(i); + size_t split_end = splits.at(i + 1); + if (split_end > indicies.size()) { + throw std::runtime_error("split_end is not valid!"); + } + if (split_begin >= split_end) { + throw std::runtime_error("Invalid split from earlier pass!"); + } + + std::vector::iterator begin = indicies.begin() + split_begin; + std::vector::iterator end = indicies.begin() + split_end; + + if (std::distance(begin, end) < kMinSplit) { + add_split(&new_splits, split_begin); + continue; + } + + // Try to split samples 50/50 in x direction. + size_t split = partition_x(begin, end, samples); + // Try to split samples 50/50 in y direction after the x split. + size_t split_y1 = partition_y(begin, begin + split, samples); + size_t split_y2 = partition_y(begin + split, end, samples); + + // Because the y2 split starts at split, add it here. + split_y2 += split; + + add_split(&new_splits, split_begin); + add_split(&new_splits, split_begin + split_y1); + add_split(&new_splits, split_begin + split); + add_split(&new_splits, split_begin + split_y2); + } + + add_split(&new_splits, samples.size()); + + if (new_splits.front() != 0) { + throw std::runtime_error("Split must start at 0"); + } + if (new_splits.back() != samples.size()) { + throw std::runtime_error("Split must end at last element"); + } + + for (size_t i = 0; i < new_splits.size() - 1; ++i) { + if (new_splits[i] >= new_splits[i + 1]) { + throw std::runtime_error("Split indicies must be increasing"); + } + } + + std::swap(splits, new_splits); + } +} + +NEXTPNR_NAMESPACE_END diff --git a/fpga_interchange/sampler.h b/fpga_interchange/sampler.h new file mode 100644 index 00000000..e8475d45 --- /dev/null +++ b/fpga_interchange/sampler.h @@ -0,0 +1,69 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021 Symbiflow Authors + * + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef SAMPLER_H_ +#define SAMPLER_H_ + +#include +#include +#include +#include + +#include "nextpnr_namespaces.h" + +NEXTPNR_NAMESPACE_BEGIN + +// Given a set of coordinates, generates random samples that are geometric +// distributed. +struct Sampler +{ + + void divide_samples(size_t target_sample_count, const std::vector> &samples); + + size_t number_of_regions() const { return splits.size() - 1; } + + size_t get_sample_from_region(size_t region, std::function rng) const + { + if (region >= (splits.size() - 1)) { + throw std::runtime_error("region out of range"); + } + size_t split_begin = splits[region]; + size_t split_end = splits[region + 1]; + if (split_begin == split_end) { + throw std::runtime_error("Splits should never be empty!"); + } + + // Pick a random element from that region. + return indicies[split_begin + (rng() % (split_end - split_begin))]; + } + + size_t get_sample(std::function rng) const + { + size_t region = rng() % number_of_regions(); + return get_sample_from_region(region, rng); + } + + std::vector indicies; + std::vector splits; +}; + +NEXTPNR_NAMESPACE_END + +#endif /* SAMPLER_H_ */ diff --git a/fpga_interchange/type_wire.cc b/fpga_interchange/type_wire.cc new file mode 100644 index 00000000..a08e024b --- /dev/null +++ b/fpga_interchange/type_wire.cc @@ -0,0 +1,83 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021 Symbiflow Authors + * + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#include "type_wire.h" +#include "nextpnr.h" + +NEXTPNR_NAMESPACE_BEGIN + +TypeWireId::TypeWireId(const Context *ctx, WireId wire_inst) +{ + NPNR_ASSERT(wire_inst != WireId()); + + if (wire_inst.tile == -1) { + auto &tile_wire = ctx->chip_info->nodes[wire_inst.index].tile_wires[0]; + type = ctx->chip_info->tiles[tile_wire.tile].type; + index = tile_wire.index; + } else { + type = ctx->chip_info->tiles[wire_inst.tile].type; + index = wire_inst.index; + } +} + +TypeWireSet::TypeWireSet(const Context *ctx, WireId wire) +{ + if (wire.tile == -1) { + const auto &node_data = ctx->chip_info->nodes[wire.index]; + wire_types_.reserve(node_data.tile_wires.size()); + for (const auto &tile_wire : node_data.tile_wires) { + wire_types_.emplace_back(); + wire_types_.back().type = ctx->chip_info->tiles[tile_wire.tile].type; + wire_types_.back().index = tile_wire.index; + } + } else { + TypeWireId wire_type(ctx, wire); + wire_types_.push_back(wire_type); + } + + std::sort(wire_types_.begin(), wire_types_.end()); + + hash_ = 0; + boost::hash_combine(hash_, std::hash()(wire_types_.size())); + for (const auto &wire : wire_types_) { + boost::hash_combine(hash_, std::hash()(wire)); + } +} + +TypeWireId::TypeWireId(lookahead_storage::TypeWireId::Reader reader) : type(reader.getType()), index(reader.getIndex()) +{ +} +void TypeWireId::to_builder(lookahead_storage::TypeWireId::Builder builder) const +{ + builder.setType(type); + builder.setIndex(index); +} + +TypeWirePair::TypeWirePair(lookahead_storage::TypeWirePair::Reader reader) : src(reader.getSrc()), dst(reader.getDst()) +{ +} + +void TypeWirePair::to_builder(lookahead_storage::TypeWirePair::Builder builder) const +{ + src.to_builder(builder.getSrc()); + dst.to_builder(builder.getDst()); +} + +NEXTPNR_NAMESPACE_END diff --git a/fpga_interchange/type_wire.h b/fpga_interchange/type_wire.h new file mode 100644 index 00000000..f2a675ef --- /dev/null +++ b/fpga_interchange/type_wire.h @@ -0,0 +1,111 @@ +/* + * nextpnr -- Next Generation Place and Route + * + * Copyright (C) 2021 Symbiflow Authors + * + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + */ + +#ifndef TYPE_WIRE_H +#define TYPE_WIRE_H + +#include +#include + +#include "nextpnr_namespaces.h" +#include "nextpnr_types.h" + +#include "lookahead.capnp.h" + +NEXTPNR_NAMESPACE_BEGIN + +struct Context; + +struct TypeWireId +{ + TypeWireId() : type(-1), index(-1) {} + TypeWireId(const Context *ctx, WireId wire_inst); + + explicit TypeWireId(lookahead_storage::TypeWireId::Reader reader); + void to_builder(lookahead_storage::TypeWireId::Builder builder) const; + + bool operator==(const TypeWireId &other) const { return type == other.type && index == other.index; } + bool operator!=(const TypeWireId &other) const { return type != other.type || index != other.index; } + bool operator<(const TypeWireId &other) const + { + return type < other.type || (type == other.type && index < other.index); + } + + int32_t type; + int32_t index; +}; + +struct TypeWirePair +{ + TypeWireId src; + TypeWireId dst; + + TypeWirePair() = default; + explicit TypeWirePair(lookahead_storage::TypeWirePair::Reader reader); + void to_builder(lookahead_storage::TypeWirePair::Builder builder) const; + + bool operator==(const TypeWirePair &other) const { return src == other.src && dst == other.dst; } + bool operator!=(const TypeWirePair &other) const { return src != other.src || dst != other.dst; } +}; + +struct TypeWireSet +{ + public: + TypeWireSet(const Context *ctx, WireId wire); + std::size_t hash() const { return hash_; } + + bool operator==(const TypeWireSet &other) const { return wire_types_ == other.wire_types_; } + bool operator!=(const TypeWireSet &other) const { return wire_types_ != other.wire_types_; } + + private: + std::size_t hash_; + std::vector wire_types_; +}; + +NEXTPNR_NAMESPACE_END + +template <> struct std::hash +{ + std::size_t operator()(const NEXTPNR_NAMESPACE_PREFIX TypeWireId &wire) const noexcept + { + std::size_t seed = 0; + boost::hash_combine(seed, std::hash()(wire.type)); + boost::hash_combine(seed, std::hash()(wire.index)); + return seed; + } +}; + +template <> struct std::hash +{ + std::size_t operator()(const NEXTPNR_NAMESPACE_PREFIX TypeWirePair &pair) const noexcept + { + std::size_t seed = 0; + boost::hash_combine(seed, std::hash()(pair.src)); + boost::hash_combine(seed, std::hash()(pair.dst)); + return seed; + } +}; + +template <> struct std::hash +{ + std::size_t operator()(const NEXTPNR_NAMESPACE_PREFIX TypeWireSet &set) const noexcept { return set.hash(); } +}; + +#endif /* TYPE_WIRE_H */