00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
00034 #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
00035
00036 #include <omp.h>
00037 #include <new>
00038 #include <bits/stl_algobase.h>
00039 #include <parallel/parallel.h>
00040 #include <parallel/numericfwd.h>
00041
00042 namespace __gnu_parallel
00043 {
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 template<typename InputIterator,
00055 typename OutputIterator,
00056 typename BinaryOperation>
00057 OutputIterator
00058 parallel_partial_sum_basecase(InputIterator begin, InputIterator end,
00059 OutputIterator result, BinaryOperation bin_op,
00060 typename std::iterator_traits
00061 <InputIterator>::value_type value)
00062 {
00063 if (begin == end)
00064 return result;
00065
00066 while (begin != end)
00067 {
00068 value = bin_op(value, *begin);
00069 *result = value;
00070 ++result;
00071 ++begin;
00072 }
00073 return result;
00074 }
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086 template<typename InputIterator,
00087 typename OutputIterator,
00088 typename BinaryOperation>
00089 OutputIterator
00090 parallel_partial_sum_linear(InputIterator begin, InputIterator end,
00091 OutputIterator result, BinaryOperation bin_op,
00092 typename std::iterator_traits
00093 <InputIterator>::difference_type n)
00094 {
00095 typedef std::iterator_traits<InputIterator> traits_type;
00096 typedef typename traits_type::value_type value_type;
00097 typedef typename traits_type::difference_type difference_type;
00098
00099 if (begin == end)
00100 return result;
00101
00102 thread_index_t num_threads =
00103 std::min<difference_type>(get_max_threads(), n - 1);
00104
00105 if (num_threads < 2)
00106 {
00107 *result = *begin;
00108 return parallel_partial_sum_basecase(
00109 begin + 1, end, result + 1, bin_op, *begin);
00110 }
00111
00112 difference_type* borders;
00113 value_type* sums;
00114
00115 const _Settings& __s = _Settings::get();
00116
00117 # pragma omp parallel num_threads(num_threads)
00118 {
00119 # pragma omp single
00120 {
00121 num_threads = omp_get_num_threads();
00122
00123 borders = new difference_type[num_threads + 2];
00124
00125 if (__s.partial_sum_dilation == 1.0f)
00126 equally_split(n, num_threads + 1, borders);
00127 else
00128 {
00129 difference_type chunk_length =
00130 ((double)n
00131 / ((double)num_threads + __s.partial_sum_dilation)),
00132 borderstart = n - num_threads * chunk_length;
00133 borders[0] = 0;
00134 for (int i = 1; i < (num_threads + 1); ++i)
00135 {
00136 borders[i] = borderstart;
00137 borderstart += chunk_length;
00138 }
00139 borders[num_threads + 1] = n;
00140 }
00141
00142 sums = static_cast<value_type*>(::operator new(sizeof(value_type)
00143 * num_threads));
00144 OutputIterator target_end;
00145 }
00146
00147 thread_index_t iam = omp_get_thread_num();
00148 if (iam == 0)
00149 {
00150 *result = *begin;
00151 parallel_partial_sum_basecase(begin + 1, begin + borders[1],
00152 result + 1, bin_op, *begin);
00153 ::new(&(sums[iam])) value_type(*(result + borders[1] - 1));
00154 }
00155 else
00156 {
00157 ::new(&(sums[iam]))
00158 value_type(__gnu_parallel::accumulate(begin + borders[iam] + 1,
00159 begin + borders[iam + 1],
00160 *(begin + borders[iam]),
00161 bin_op,
00162 __gnu_parallel::sequential_tag()));
00163 }
00164
00165 # pragma omp barrier
00166
00167 # pragma omp single
00168 parallel_partial_sum_basecase(
00169 sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]);
00170
00171 # pragma omp barrier
00172
00173
00174 parallel_partial_sum_basecase(begin + borders[iam + 1],
00175 begin + borders[iam + 2],
00176 result + borders[iam + 1], bin_op,
00177 sums[iam]);
00178 }
00179
00180 ::operator delete(sums);
00181 delete[] borders;
00182
00183 return result + n;
00184 }
00185
00186
00187
00188
00189
00190
00191
00192 template<typename InputIterator,
00193 typename OutputIterator,
00194 typename BinaryOperation>
00195 OutputIterator
00196 parallel_partial_sum(InputIterator begin, InputIterator end,
00197 OutputIterator result, BinaryOperation bin_op)
00198 {
00199 _GLIBCXX_CALL(begin - end)
00200
00201 typedef std::iterator_traits<InputIterator> traits_type;
00202 typedef typename traits_type::value_type value_type;
00203 typedef typename traits_type::difference_type difference_type;
00204
00205 difference_type n = end - begin;
00206
00207 switch (_Settings::get().partial_sum_algorithm)
00208 {
00209 case LINEAR:
00210
00211 return parallel_partial_sum_linear(begin, end, result, bin_op, n);
00212 default:
00213
00214 _GLIBCXX_PARALLEL_ASSERT(0);
00215 return result + n;
00216 }
00217 }
00218 }
00219
00220 #endif