static: Add a basic threadpool

Signed-off-by: gatecat <gatecat@ds0.me>
2024-01-24 15:36:37 +01:00 · 2024-01-24 15:36:37 +01:00 · 9dcd0eff16
commit 9dcd0eff16
parent 73b7de74a5
1 changed files with 108 additions and 13 deletions
--- a/common/place/placer_static.cc
+++ b/common/place/placer_static.cc
@ -40,6 +40,14 @@
 #include "fftsg.h"
 #ifndef NEXTPNR_DISABLE_THREADS
 #include <atomic>
 #include <condition_variable>
 #include <mutex>
 #include <thread>
 #endif
 NEXTPNR_NAMESPACE_BEGIN
 using namespace StaticUtil;
@ -140,6 +148,93 @@ struct PlacerNet
    int hpwl() { return (b1.x - b0.x) + (b1.y - b0.y); }
 };
 #ifdef NEXTPNR_DISABLE_THREADS
 struct ThreadPool
 {
    ThreadPool(int){};
    void run(int N, std::function<void(int)> func)
    {
        for (int i = 0; i < N; i++)
            func(i);
    };
 };
 #else
 struct ThreadPool
 {
    ThreadPool(int thread_count)
    {
        done.resize(thread_count, false);
        for (int i = 0; i < thread_count; i++) {
            threads.emplace_back([this, i]() { this->worker(i); });
        }
    }
    std::vector<std::thread> threads;
    std::condition_variable cv_start, cv_done;
    std::mutex mutex;
    bool work_available = false;
    bool shutdown = false;
    std::vector<bool> done;
    std::function<void(int)> work;
    int work_count;
    ~ThreadPool()
    {
        {
            std::lock_guard lk(mutex);
            shutdown = true;
        }
        cv_start.notify_all();
        for (auto &t : threads)
            t.join();
    }
    void run(int N, std::function<void(int)> func)
    {
        {
            std::lock_guard lk(mutex);
            work = func;
            work_count = N;
            work_available = true;
            std::fill(done.begin(), done.end(), false);
        }
        cv_start.notify_all();
        {
            std::unique_lock lk(mutex);
            cv_done.wait(lk, [this] { return std::all_of(done.begin(), done.end(), [](bool x) { return x; }); });
            work_available = false;
        }
    }
    void worker(int idx)
    {
        while (true) {
            std::unique_lock lk(mutex);
            cv_start.wait(lk, [this, idx] { return (work_available && !done.at(idx)) || shutdown; });
            if (shutdown) {
                lk.unlock();
                break;
            } else if (work_available && !done.at(idx)) {
                int work_per_thread = (work_count + int(threads.size()) - 1) / threads.size();
                int begin = work_per_thread * idx;
                int end = std::min(work_count, work_per_thread * (idx + 1));
                lk.unlock();
                for (int j = begin; j < end; j++) {
                    work(j);
                }
                lk.lock();
                done.at(idx) = true;
                lk.unlock();
                cv_done.notify_one();
            }
        }
    }
 };
 #endif
 class StaticPlacer
 {
    Context *ctx;
@ -154,6 +249,7 @@ class StaticPlacer
    FastBels fast_bels;
    TimingAnalyser tmg;
    ThreadPool pool;
    int width, height;
    int iter = 0;
@ -626,12 +722,14 @@ class StaticPlacer
    RealPair wl_coeff{0.5f, 0.5f};
-    void update_nets(Axis axis, bool ref)
+    void update_nets(bool ref)
    {
        static constexpr float min_wirelen_force = -300.f;
-        for (auto &net : nets) {
+        pool.run(2 * nets.size(), [&](int i) {
            auto &net = nets.at(i / 2);
            auto axis = (i % 2) ? Axis::Y : Axis::X;
            if (net.skip)
-                continue;
+                return;
            net.min_exp.at(axis) = 0;
            net.x_min_exp.at(axis) = 0;
            net.max_exp.at(axis) = 0;
@ -665,7 +763,7 @@ class StaticPlacer
            }
            net.wa_wl.at(axis) =
                    (net.x_max_exp.at(axis) / net.max_exp.at(axis)) - (net.x_min_exp.at(axis) / net.min_exp.at(axis));
-        }
+        });
    }
    float wirelen_grad(CellInfo *cell, Axis axis, bool ref)
@ -709,13 +807,11 @@ class StaticPlacer
    void update_gradients(bool ref = true, bool set_prev = true, bool init_penalty = false)
    {
        // TODO: skip non-group cells more efficiently?
-        for (int group = 0; group < int(groups.size()); group++) {
+        pool.run(groups.size(), [&](int group) {
            compute_density(group, ref);
            run_fft(group);
-        }
+        });
-        for (auto axis : {Axis::X, Axis::Y}) {
+        update_nets(ref);
            update_nets(axis, ref);
        }
        // First loop: back up gradients if required; set to zero; and compute density gradient
        for (auto &cell : mcells) {
            auto &g = groups.at(cell.group);
@ -966,9 +1062,7 @@ class StaticPlacer
        log_info("Strict legalising %d cells...\n", int(to_legalise.size()));
        float pre_hpwl = system_hpwl();
        legalise_placement_strict(true);
-        for (auto axis : {Axis::X, Axis::Y}) {
+        update_nets(true);
            update_nets(axis, true);
        }
        float post_hpwl = system_hpwl();
        log_info("HPWL after legalise: %f (delta: %f)\n", post_hpwl, post_hpwl - pre_hpwl);
    }
@ -1244,7 +1338,8 @@ class StaticPlacer
    }
  public:
-    StaticPlacer(Context *ctx, PlacerStaticCfg cfg) : ctx(ctx), cfg(cfg), fast_bels(ctx, true, 8), tmg(ctx)
+    StaticPlacer(Context *ctx, PlacerStaticCfg cfg)
            : ctx(ctx), cfg(cfg), fast_bels(ctx, true, 8), tmg(ctx), pool(ctx->setting<int>("threads", 8))
    {
        groups.resize(cfg.cell_groups.size());
    };