// This file is part of libigl, a simple c++ geometry processing library. // // Copyright (C) 2016 Alec Jacobson // // This Source Code Form is subject to the terms of the Mozilla Public License // v. 2.0. If a copy of the MPL was not distributed with this file, You can // obtain one at http://mozilla.org/MPL/2.0/. #ifndef IGL_PARALLEL_FOR_H #define IGL_PARALLEL_FOR_H #include "igl_inline.h" #include //#warning "Defining IGL_PARALLEL_FOR_FORCE_SERIAL" //#define IGL_PARALLEL_FOR_FORCE_SERIAL namespace igl { // PARALLEL_FOR Functional implementation of a basic, open-mp style, parallel // for loop. If the inner block of a for-loop can be rewritten/encapsulated in // a single (anonymous/lambda) function call `func` so that the serial code // looks like: // // for(int i = 0;i inline bool parallel_for( const Index loop_size, const FunctionType & func, const size_t min_parallel=0); // PARALLEL_FOR Functional implementation of an open-mp style, parallel for // loop with accumulation. For example, serial code separated into n chunks // (each to be parallelized with a thread) might look like: // // Eigen::VectorXd S; // const auto & prep_func = [&S](int n){ S = Eigen:VectorXd::Zero(n); }; // const auto & func = [&X,&S](int i, int t){ S(t) += X(i); }; // const auto & accum_func = [&S,&sum](int t){ sum += S(t); }; // prep_func(n); // for(int i = 0;i= number of threads as only // argument // func function handle taking iteration index i and thread id t as only // arguments to compute inner block of for loop I.e. // for(int i ...){ func(i,t); } // accum_func function handle taking thread index as only argument, to be // called after all calls of func, e.g., for serial accumulation across // all n (potential) threads, see n in description of prep_func. // min_parallel min size of loop_size such that parallel (non-serial) // thread pooling should be attempted {0} // Returns true iff thread pool was invoked template< typename Index, typename PrepFunctionType, typename FunctionType, typename AccumFunctionType > inline bool parallel_for( const Index loop_size, const PrepFunctionType & prep_func, const FunctionType & func, const AccumFunctionType & accum_func, const size_t min_parallel=0); } // Implementation #include #include #include #include #include template inline bool igl::parallel_for( const Index loop_size, const FunctionType & func, const size_t min_parallel) { using namespace std; // no op preparation/accumulation const auto & no_op = [](const size_t /*n/t*/){}; // two-parameter wrapper ignoring thread id const auto & wrapper = [&func](Index i,size_t /*t*/){ func(i); }; return parallel_for(loop_size,no_op,wrapper,no_op,min_parallel); } template< typename Index, typename PreFunctionType, typename FunctionType, typename AccumFunctionType> inline bool igl::parallel_for( const Index loop_size, const PreFunctionType & prep_func, const FunctionType & func, const AccumFunctionType & accum_func, const size_t min_parallel) { assert(loop_size>=0); if(loop_size==0) return false; // Estimate number of threads in the pool // http://ideone.com/Z7zldb const static size_t sthc = std::thread::hardware_concurrency(); const size_t nthreads = #ifdef IGL_PARALLEL_FOR_FORCE_SERIAL 0; #else loop_size(nthreads)),(Index)1); // [Helper] Inner loop const auto & range = [&func](const Index k1, const Index k2, const size_t t) { for(Index k = k1; k < k2; k++) func(k,t); }; prep_func(nthreads); // Create pool and launch jobs std::vector pool; pool.reserve(nthreads); // Inner range extents Index i1 = 0; Index i2 = std::min(0 + slice, loop_size); { size_t t = 0; for (; t+1 < nthreads && i1 < loop_size; ++t) { pool.emplace_back(range, i1, i2, t); i1 = i2; i2 = std::min(i2 + slice, loop_size); } if (i1 < loop_size) { pool.emplace_back(range, i1, loop_size, t); } } // Wait for jobs to finish for (std::thread &t : pool) if (t.joinable()) t.join(); // Accumulate across threads for(size_t t = 0;t