static: Add a basic threadpool

Signed-off-by: gatecat <gatecat@ds0.me>
This commit is contained in:
gatecat 2024-01-24 15:36:37 +01:00 committed by myrtle
parent 73b7de74a5
commit 9dcd0eff16

View File

@ -40,6 +40,14 @@
#include "fftsg.h" #include "fftsg.h"
#ifndef NEXTPNR_DISABLE_THREADS
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <thread>
#endif
NEXTPNR_NAMESPACE_BEGIN NEXTPNR_NAMESPACE_BEGIN
using namespace StaticUtil; using namespace StaticUtil;
@ -140,6 +148,93 @@ struct PlacerNet
int hpwl() { return (b1.x - b0.x) + (b1.y - b0.y); } int hpwl() { return (b1.x - b0.x) + (b1.y - b0.y); }
}; };
#ifdef NEXTPNR_DISABLE_THREADS
struct ThreadPool
{
ThreadPool(int){};
void run(int N, std::function<void(int)> func)
{
for (int i = 0; i < N; i++)
func(i);
};
};
#else
struct ThreadPool
{
ThreadPool(int thread_count)
{
done.resize(thread_count, false);
for (int i = 0; i < thread_count; i++) {
threads.emplace_back([this, i]() { this->worker(i); });
}
}
std::vector<std::thread> threads;
std::condition_variable cv_start, cv_done;
std::mutex mutex;
bool work_available = false;
bool shutdown = false;
std::vector<bool> done;
std::function<void(int)> work;
int work_count;
~ThreadPool()
{
{
std::lock_guard lk(mutex);
shutdown = true;
}
cv_start.notify_all();
for (auto &t : threads)
t.join();
}
void run(int N, std::function<void(int)> func)
{
{
std::lock_guard lk(mutex);
work = func;
work_count = N;
work_available = true;
std::fill(done.begin(), done.end(), false);
}
cv_start.notify_all();
{
std::unique_lock lk(mutex);
cv_done.wait(lk, [this] { return std::all_of(done.begin(), done.end(), [](bool x) { return x; }); });
work_available = false;
}
}
void worker(int idx)
{
while (true) {
std::unique_lock lk(mutex);
cv_start.wait(lk, [this, idx] { return (work_available && !done.at(idx)) || shutdown; });
if (shutdown) {
lk.unlock();
break;
} else if (work_available && !done.at(idx)) {
int work_per_thread = (work_count + int(threads.size()) - 1) / threads.size();
int begin = work_per_thread * idx;
int end = std::min(work_count, work_per_thread * (idx + 1));
lk.unlock();
for (int j = begin; j < end; j++) {
work(j);
}
lk.lock();
done.at(idx) = true;
lk.unlock();
cv_done.notify_one();
}
}
}
};
#endif
class StaticPlacer class StaticPlacer
{ {
Context *ctx; Context *ctx;
@ -154,6 +249,7 @@ class StaticPlacer
FastBels fast_bels; FastBels fast_bels;
TimingAnalyser tmg; TimingAnalyser tmg;
ThreadPool pool;
int width, height; int width, height;
int iter = 0; int iter = 0;
@ -626,12 +722,14 @@ class StaticPlacer
RealPair wl_coeff{0.5f, 0.5f}; RealPair wl_coeff{0.5f, 0.5f};
void update_nets(Axis axis, bool ref) void update_nets(bool ref)
{ {
static constexpr float min_wirelen_force = -300.f; static constexpr float min_wirelen_force = -300.f;
for (auto &net : nets) { pool.run(2 * nets.size(), [&](int i) {
auto &net = nets.at(i / 2);
auto axis = (i % 2) ? Axis::Y : Axis::X;
if (net.skip) if (net.skip)
continue; return;
net.min_exp.at(axis) = 0; net.min_exp.at(axis) = 0;
net.x_min_exp.at(axis) = 0; net.x_min_exp.at(axis) = 0;
net.max_exp.at(axis) = 0; net.max_exp.at(axis) = 0;
@ -665,7 +763,7 @@ class StaticPlacer
} }
net.wa_wl.at(axis) = net.wa_wl.at(axis) =
(net.x_max_exp.at(axis) / net.max_exp.at(axis)) - (net.x_min_exp.at(axis) / net.min_exp.at(axis)); (net.x_max_exp.at(axis) / net.max_exp.at(axis)) - (net.x_min_exp.at(axis) / net.min_exp.at(axis));
} });
} }
float wirelen_grad(CellInfo *cell, Axis axis, bool ref) float wirelen_grad(CellInfo *cell, Axis axis, bool ref)
@ -709,13 +807,11 @@ class StaticPlacer
void update_gradients(bool ref = true, bool set_prev = true, bool init_penalty = false) void update_gradients(bool ref = true, bool set_prev = true, bool init_penalty = false)
{ {
// TODO: skip non-group cells more efficiently? // TODO: skip non-group cells more efficiently?
for (int group = 0; group < int(groups.size()); group++) { pool.run(groups.size(), [&](int group) {
compute_density(group, ref); compute_density(group, ref);
run_fft(group); run_fft(group);
} });
for (auto axis : {Axis::X, Axis::Y}) { update_nets(ref);
update_nets(axis, ref);
}
// First loop: back up gradients if required; set to zero; and compute density gradient // First loop: back up gradients if required; set to zero; and compute density gradient
for (auto &cell : mcells) { for (auto &cell : mcells) {
auto &g = groups.at(cell.group); auto &g = groups.at(cell.group);
@ -966,9 +1062,7 @@ class StaticPlacer
log_info("Strict legalising %d cells...\n", int(to_legalise.size())); log_info("Strict legalising %d cells...\n", int(to_legalise.size()));
float pre_hpwl = system_hpwl(); float pre_hpwl = system_hpwl();
legalise_placement_strict(true); legalise_placement_strict(true);
for (auto axis : {Axis::X, Axis::Y}) { update_nets(true);
update_nets(axis, true);
}
float post_hpwl = system_hpwl(); float post_hpwl = system_hpwl();
log_info("HPWL after legalise: %f (delta: %f)\n", post_hpwl, post_hpwl - pre_hpwl); log_info("HPWL after legalise: %f (delta: %f)\n", post_hpwl, post_hpwl - pre_hpwl);
} }
@ -1244,7 +1338,8 @@ class StaticPlacer
} }
public: public:
StaticPlacer(Context *ctx, PlacerStaticCfg cfg) : ctx(ctx), cfg(cfg), fast_bels(ctx, true, 8), tmg(ctx) StaticPlacer(Context *ctx, PlacerStaticCfg cfg)
: ctx(ctx), cfg(cfg), fast_bels(ctx, true, 8), tmg(ctx), pool(ctx->setting<int>("threads", 8))
{ {
groups.resize(cfg.cell_groups.size()); groups.resize(cfg.cell_groups.size());
}; };