HAL
parallel_for_each.h
Go to the documentation of this file.
1 // MIT License
2 //
3 // Copyright (c) 2019 Ruhr University Bochum, Chair for Embedded Security. All Rights reserved.
4 // Copyright (c) 2019 Marc Fyrbiak, Sebastian Wallat, Max Hoffmann ("ORIGINAL AUTHORS"). All rights reserved.
5 // Copyright (c) 2021 Max Planck Institute for Security and Privacy. All Rights reserved.
6 // Copyright (c) 2021 Jörn Langheinrich, Julian Speith, Nils Albartus, René Walendy, Simon Klix ("ORIGINAL AUTHORS"). All Rights reserved.
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
15 // The above copyright notice and this permission notice shall be included in all
16 // copies or substantial portions of the Software.
17 //
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 // SOFTWARE.
25 
26 #pragma once
27 
28 #include "hal_core/defines.h"
29 
30 #include <cmath>
31 #include <functional>
32 #include <set>
33 #include <thread>
34 
35 namespace hal
36 {
37  namespace dataflow
38  {
39  namespace utils
40  {
41  template<typename R>
42  void parallel_for_each(u32 begin, u32 end, R func)
43  {
44  if (end - begin == 0)
45  {
46  return;
47  }
48 
49  u32 num_threads = std::min(end - begin, std::thread::hardware_concurrency());
50  u32 steps_per_thread = std::ceil((float)(end - begin) / num_threads);
51 
52  std::vector<std::thread> threads;
53  threads.reserve(num_threads);
54 
55  // spawn threads, start at index 1 since index 0 is the main thread
56  for (u32 thread_idx = 1; thread_idx < num_threads; ++thread_idx)
57  {
58  threads.emplace_back([&func, end, thread_idx, steps_per_thread]() {
59  u32 local_end = std::min(end, steps_per_thread * (thread_idx + 1));
60  for (u32 i = steps_per_thread * thread_idx; i < local_end; ++i)
61  {
62  func(i);
63  }
64  });
65  }
66 
67  // also do work on main thread
68  {
69  u32 local_end = std::min(end, steps_per_thread);
70  for (u32 i = 0; i < local_end; ++i)
71  {
72  func(i);
73  }
74  }
75 
76  // wait until all threads are done
77  for (auto& t : threads)
78  {
79  t.join();
80  }
81  }
82 
83  template<typename T, typename R>
84  void parallel_for_each(const std::vector<T>& elements, R func)
85  {
86  parallel_for_each(0, elements.size(), [&elements, &func](u32 i) { func(elements[i]); });
87  }
88 
89  template<typename T, typename R>
90  void parallel_for_each(const std::set<T>& elements, R func)
91  {
92  std::vector<T> vec;
93  vec.reserve(elements.size());
94  vec.insert(vec.end(), elements.begin(), elements.end());
95  parallel_for_each(vec, func);
96  }
97 
98  template<typename T, typename R>
99  void parallel_for_each(const std::unordered_set<T>& elements, R func)
100  {
101  std::vector<T> vec;
102  vec.reserve(elements.size());
103  vec.insert(vec.end(), elements.begin(), elements.end());
104  parallel_for_each(vec, func);
105  }
106  } // namespace utils
107  } // namespace dataflow
108 } // namespace hal
void parallel_for_each(u32 begin, u32 end, R func)
Definition: utils.py:1
quint32 u32