00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H
00033 #define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1
00034
00035 #include <limits>
00036 #include <bits/stl_numeric.h>
00037 #include <parallel/parallel.h>
00038 #include <parallel/random_number.h>
00039
00040 namespace __gnu_parallel
00041 {
00042
00043
00044
00045
00046
00047 typedef unsigned short bin_index;
00048
00049
00050
00051 template<typename RandomAccessIterator>
00052 struct DRandomShufflingGlobalData
00053 {
00054 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00055 typedef typename traits_type::value_type value_type;
00056 typedef typename traits_type::difference_type difference_type;
00057
00058
00059 RandomAccessIterator& source;
00060
00061
00062 value_type** temporaries;
00063
00064
00065
00066
00067 difference_type** dist;
00068
00069
00070 difference_type* starts;
00071
00072
00073
00074 thread_index_t* bin_proc;
00075
00076
00077 int num_bins;
00078
00079
00080 int num_bits;
00081
00082
00083 DRandomShufflingGlobalData(RandomAccessIterator& _source)
00084 : source(_source) { }
00085 };
00086
00087
00088
00089
00090 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00091 struct DRSSorterPU
00092 {
00093
00094 int num_threads;
00095
00096
00097 bin_index bins_begin;
00098
00099
00100 bin_index bins_end;
00101
00102
00103 uint32 seed;
00104
00105
00106 DRandomShufflingGlobalData<RandomAccessIterator>* sd;
00107 };
00108
00109
00110
00111
00112
00113 template<typename RandomNumberGenerator>
00114 inline int
00115 random_number_pow2(int logp, RandomNumberGenerator& rng)
00116 { return rng.genrand_bits(logp); }
00117
00118
00119
00120 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00121 void
00122 parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator,
00123 RandomNumberGenerator>* pus)
00124 {
00125 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00126 typedef typename traits_type::value_type value_type;
00127 typedef typename traits_type::difference_type difference_type;
00128
00129 thread_index_t iam = omp_get_thread_num();
00130 DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[iam];
00131 DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
00132
00133
00134 difference_type length = sd->starts[iam + 1] - sd->starts[iam];
00135 bin_index* oracles = new bin_index[length];
00136 difference_type* dist = new difference_type[sd->num_bins + 1];
00137 bin_index* bin_proc = new bin_index[sd->num_bins];
00138 value_type** temporaries = new value_type*[d->num_threads];
00139
00140
00141 for (bin_index b = 0; b < sd->num_bins + 1; ++b)
00142 dist[b] = 0;
00143 int num_bits = sd->num_bits;
00144
00145 random_number rng(d->seed);
00146
00147
00148 for (difference_type i = 0; i < length; ++i)
00149 {
00150 bin_index oracle = random_number_pow2(num_bits, rng);
00151 oracles[i] = oracle;
00152
00153
00154 ++(dist[oracle + 1]);
00155 }
00156
00157 for (bin_index b = 0; b < sd->num_bins + 1; ++b)
00158 sd->dist[b][iam + 1] = dist[b];
00159
00160 # pragma omp barrier
00161
00162 # pragma omp single
00163 {
00164
00165
00166 for (bin_index s = 0; s < sd->num_bins; ++s)
00167 __gnu_sequential::partial_sum(sd->dist[s + 1],
00168 sd->dist[s + 1] + d->num_threads + 1,
00169 sd->dist[s + 1]);
00170 }
00171
00172 # pragma omp barrier
00173
00174 sequence_index_t offset = 0, global_offset = 0;
00175 for (bin_index s = 0; s < d->bins_begin; ++s)
00176 global_offset += sd->dist[s + 1][d->num_threads];
00177
00178 # pragma omp barrier
00179
00180 for (bin_index s = d->bins_begin; s < d->bins_end; ++s)
00181 {
00182 for (int t = 0; t < d->num_threads + 1; ++t)
00183 sd->dist[s + 1][t] += offset;
00184 offset = sd->dist[s + 1][d->num_threads];
00185 }
00186
00187 sd->temporaries[iam] = static_cast<value_type*>(
00188 ::operator new(sizeof(value_type) * offset));
00189
00190 # pragma omp barrier
00191
00192
00193 for (bin_index b = 0; b < sd->num_bins + 1; ++b)
00194 dist[b] = sd->dist[b][iam];
00195 for (bin_index b = 0; b < sd->num_bins; ++b)
00196 bin_proc[b] = sd->bin_proc[b];
00197 for (thread_index_t t = 0; t < d->num_threads; ++t)
00198 temporaries[t] = sd->temporaries[t];
00199
00200 RandomAccessIterator source = sd->source;
00201 difference_type start = sd->starts[iam];
00202
00203
00204 for (difference_type i = 0; i < length; ++i)
00205 {
00206 bin_index target_bin = oracles[i];
00207 thread_index_t target_p = bin_proc[target_bin];
00208
00209
00210 ::new(&(temporaries[target_p][dist[target_bin + 1]++]))
00211 value_type(*(source + i + start));
00212 }
00213
00214 delete[] oracles;
00215 delete[] dist;
00216 delete[] bin_proc;
00217 delete[] temporaries;
00218
00219 # pragma omp barrier
00220
00221
00222 for (bin_index b = d->bins_begin; b < d->bins_end; ++b)
00223 {
00224 value_type* begin =
00225 sd->temporaries[iam] +
00226 ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
00227 * end =
00228 sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
00229 sequential_random_shuffle(begin, end, rng);
00230 std::copy(begin, end, sd->source + global_offset +
00231 ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
00232 }
00233
00234 ::operator delete(sd->temporaries[iam]);
00235 }
00236
00237
00238
00239 template<typename T>
00240 T
00241 round_up_to_pow2(T x)
00242 {
00243 if (x <= 1)
00244 return 1;
00245 else
00246 return (T)1 << (__log2(x - 1) + 1);
00247 }
00248
00249
00250
00251
00252
00253
00254
00255
00256 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00257 void
00258 parallel_random_shuffle_drs(RandomAccessIterator begin,
00259 RandomAccessIterator end,
00260 typename std::iterator_traits
00261 <RandomAccessIterator>::difference_type n,
00262 thread_index_t num_threads,
00263 RandomNumberGenerator& rng)
00264 {
00265 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00266 typedef typename traits_type::value_type value_type;
00267 typedef typename traits_type::difference_type difference_type;
00268
00269 _GLIBCXX_CALL(n)
00270
00271 const _Settings& __s = _Settings::get();
00272
00273 if (num_threads > n)
00274 num_threads = static_cast<thread_index_t>(n);
00275
00276 bin_index num_bins, num_bins_cache;
00277
00278 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00279
00280
00281
00282 num_bins_cache = std::max<difference_type>(
00283 1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
00284 num_bins_cache = round_up_to_pow2(num_bins_cache);
00285
00286
00287
00288 num_bins = std::min<difference_type>(n, num_bins_cache);
00289
00290 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00291
00292 num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins);
00293 #endif
00294 num_bins = round_up_to_pow2(num_bins);
00295
00296 if (num_bins < num_bins_cache)
00297 {
00298 #endif
00299
00300
00301 num_bins_cache = static_cast<bin_index>(std::max<difference_type>(
00302 1, n / (__s.L2_cache_size / sizeof(value_type))));
00303 num_bins_cache = round_up_to_pow2(num_bins_cache);
00304
00305
00306 num_bins = static_cast<bin_index>(
00307 std::min(n, static_cast<difference_type>(num_bins_cache)));
00308
00309 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00310
00311 num_bins = std::min(
00312 static_cast<difference_type>(__s.TLB_size / 2), num_bins);
00313 #endif
00314 num_bins = round_up_to_pow2(num_bins);
00315 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00316 }
00317 #endif
00318
00319 num_threads = std::min<bin_index>(num_threads, num_bins);
00320
00321 if (num_threads <= 1)
00322 return sequential_random_shuffle(begin, end, rng);
00323
00324 DRandomShufflingGlobalData<RandomAccessIterator> sd(begin);
00325 DRSSorterPU<RandomAccessIterator, random_number >* pus;
00326 difference_type* starts;
00327
00328 # pragma omp parallel num_threads(num_threads)
00329 {
00330 thread_index_t num_threads = omp_get_num_threads();
00331 # pragma omp single
00332 {
00333 pus = new DRSSorterPU<RandomAccessIterator, random_number>
00334 [num_threads];
00335
00336 sd.temporaries = new value_type*[num_threads];
00337 sd.dist = new difference_type*[num_bins + 1];
00338 sd.bin_proc = new thread_index_t[num_bins];
00339 for (bin_index b = 0; b < num_bins + 1; ++b)
00340 sd.dist[b] = new difference_type[num_threads + 1];
00341 for (bin_index b = 0; b < (num_bins + 1); ++b)
00342 {
00343 sd.dist[0][0] = 0;
00344 sd.dist[b][0] = 0;
00345 }
00346 starts = sd.starts = new difference_type[num_threads + 1];
00347 int bin_cursor = 0;
00348 sd.num_bins = num_bins;
00349 sd.num_bits = __log2(num_bins);
00350
00351 difference_type chunk_length = n / num_threads,
00352 split = n % num_threads, start = 0;
00353 difference_type bin_chunk_length = num_bins / num_threads,
00354 bin_split = num_bins % num_threads;
00355 for (thread_index_t i = 0; i < num_threads; ++i)
00356 {
00357 starts[i] = start;
00358 start += (i < split) ? (chunk_length + 1) : chunk_length;
00359 int j = pus[i].bins_begin = bin_cursor;
00360
00361
00362 bin_cursor += (i < bin_split) ?
00363 (bin_chunk_length + 1) : bin_chunk_length;
00364 pus[i].bins_end = bin_cursor;
00365 for (; j < bin_cursor; ++j)
00366 sd.bin_proc[j] = i;
00367 pus[i].num_threads = num_threads;
00368 pus[i].seed = rng(std::numeric_limits<uint32>::max());
00369 pus[i].sd = &sd;
00370 }
00371 starts[num_threads] = start;
00372 }
00373
00374 parallel_random_shuffle_drs_pu(pus);
00375 }
00376
00377 delete[] starts;
00378 delete[] sd.bin_proc;
00379 for (int s = 0; s < (num_bins + 1); ++s)
00380 delete[] sd.dist[s];
00381 delete[] sd.dist;
00382 delete[] sd.temporaries;
00383
00384 delete[] pus;
00385 }
00386
00387
00388
00389
00390
00391
00392 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00393 void
00394 sequential_random_shuffle(RandomAccessIterator begin,
00395 RandomAccessIterator end,
00396 RandomNumberGenerator& rng)
00397 {
00398 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00399 typedef typename traits_type::value_type value_type;
00400 typedef typename traits_type::difference_type difference_type;
00401
00402 difference_type n = end - begin;
00403 const _Settings& __s = _Settings::get();
00404
00405 bin_index num_bins, num_bins_cache;
00406
00407 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00408
00409 num_bins_cache =
00410 std::max<difference_type>
00411 (1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
00412 num_bins_cache = round_up_to_pow2(num_bins_cache);
00413
00414
00415
00416 num_bins = std::min(n, (difference_type)num_bins_cache);
00417 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00418
00419 num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins);
00420 #endif
00421 num_bins = round_up_to_pow2(num_bins);
00422
00423 if (num_bins < num_bins_cache)
00424 {
00425 #endif
00426
00427 num_bins_cache =
00428 static_cast<bin_index>(std::max<difference_type>(
00429 1, n / (__s.L2_cache_size / sizeof(value_type))));
00430 num_bins_cache = round_up_to_pow2(num_bins_cache);
00431
00432
00433
00434 num_bins = static_cast<bin_index>
00435 (std::min(n, static_cast<difference_type>(num_bins_cache)));
00436
00437 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00438
00439 num_bins =
00440 std::min<difference_type>(__s.TLB_size / 2, num_bins);
00441 #endif
00442 num_bins = round_up_to_pow2(num_bins);
00443 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00444 }
00445 #endif
00446
00447 int num_bits = __log2(num_bins);
00448
00449 if (num_bins > 1)
00450 {
00451 value_type* target = static_cast<value_type*>(
00452 ::operator new(sizeof(value_type) * n));
00453 bin_index* oracles = new bin_index[n];
00454 difference_type* dist0 = new difference_type[num_bins + 1],
00455 * dist1 = new difference_type[num_bins + 1];
00456
00457 for (int b = 0; b < num_bins + 1; ++b)
00458 dist0[b] = 0;
00459
00460 random_number bitrng(rng(0xFFFFFFFF));
00461
00462 for (difference_type i = 0; i < n; ++i)
00463 {
00464 bin_index oracle = random_number_pow2(num_bits, bitrng);
00465 oracles[i] = oracle;
00466
00467
00468 ++(dist0[oracle + 1]);
00469 }
00470
00471
00472 __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
00473
00474 for (int b = 0; b < num_bins + 1; ++b)
00475 dist1[b] = dist0[b];
00476
00477
00478 for (difference_type i = 0; i < n; ++i)
00479 ::new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i));
00480
00481 for (int b = 0; b < num_bins; ++b)
00482 {
00483 sequential_random_shuffle(target + dist1[b],
00484 target + dist1[b + 1],
00485 rng);
00486 }
00487
00488
00489 std::copy(target, target + n, begin);
00490
00491 delete[] dist0;
00492 delete[] dist1;
00493 delete[] oracles;
00494 ::operator delete(target);
00495 }
00496 else
00497 __gnu_sequential::random_shuffle(begin, end, rng);
00498 }
00499
00500
00501
00502
00503
00504
00505 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00506 inline void
00507 parallel_random_shuffle(RandomAccessIterator begin,
00508 RandomAccessIterator end,
00509 RandomNumberGenerator rng = random_number())
00510 {
00511 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00512 typedef typename traits_type::difference_type difference_type;
00513 difference_type n = end - begin;
00514 parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ;
00515 }
00516
00517 }
00518
00519 #endif