random_shuffle.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 3, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file parallel/random_shuffle.h
00026  *  @brief Parallel implementation of std::random_shuffle().
00027  *  This file is a GNU parallel extension to the Standard C++ Library.
00028  */
00029 
00030 // Written by Johannes Singler.
00031 
00032 #ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H
00033 #define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1
00034 
00035 #include <limits>
00036 #include <bits/stl_numeric.h>
00037 #include <parallel/parallel.h>
00038 #include <parallel/random_number.h>
00039 
00040 namespace __gnu_parallel
00041 {
00042 /** @brief Type to hold the index of a bin.
00043   *
00044   *  Since many variables of this type are allocated, it should be
00045   *  chosen as small as possible.
00046   */
00047 typedef unsigned short bin_index;
00048 
00049 /** @brief Data known to every thread participating in
00050     __gnu_parallel::parallel_random_shuffle(). */
00051 template<typename RandomAccessIterator>
00052   struct DRandomShufflingGlobalData
00053   {
00054     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00055     typedef typename traits_type::value_type value_type;
00056     typedef typename traits_type::difference_type difference_type;
00057 
00058     /** @brief Begin iterator of the source. */
00059     RandomAccessIterator& source;
00060 
00061     /** @brief Temporary arrays for each thread. */
00062     value_type** temporaries;
00063 
00064     /** @brief Two-dimensional array to hold the thread-bin distribution.
00065      *
00066      *  Dimensions (num_threads + 1) x (num_bins + 1). */
00067     difference_type** dist;
00068 
00069     /** @brief Start indexes of the threads' chunks. */
00070     difference_type* starts;
00071 
00072     /** @brief Number of the thread that will further process the
00073     corresponding bin. */
00074     thread_index_t* bin_proc;
00075 
00076     /** @brief Number of bins to distribute to. */
00077     int num_bins;
00078 
00079     /** @brief Number of bits needed to address the bins. */
00080     int num_bits;
00081 
00082     /** @brief Constructor. */
00083     DRandomShufflingGlobalData(RandomAccessIterator& _source)
00084     : source(_source) { }
00085   };
00086 
00087 /** @brief Local data for a thread participating in
00088     __gnu_parallel::parallel_random_shuffle().
00089   */
00090 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00091   struct DRSSorterPU
00092   {
00093     /** @brief Number of threads participating in total. */
00094     int num_threads;
00095 
00096     /** @brief Begin index for bins taken care of by this thread. */
00097     bin_index bins_begin;
00098 
00099     /** @brief End index for bins taken care of by this thread. */
00100     bin_index bins_end;
00101 
00102     /** @brief Random seed for this thread. */
00103     uint32 seed;
00104 
00105     /** @brief Pointer to global data. */
00106     DRandomShufflingGlobalData<RandomAccessIterator>* sd;
00107   };
00108 
00109 /** @brief Generate a random number in @c [0,2^logp).
00110   *  @param logp Logarithm (basis 2) of the upper range bound.
00111   *  @param rng Random number generator to use.
00112   */
00113 template<typename RandomNumberGenerator>
00114   inline int
00115   random_number_pow2(int logp, RandomNumberGenerator& rng)
00116   { return rng.genrand_bits(logp); }
00117 
00118 /** @brief Random shuffle code executed by each thread.
00119   *  @param pus Array of thread-local data records. */
00120 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00121   void 
00122   parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator,
00123                                  RandomNumberGenerator>* pus)
00124   {
00125     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00126     typedef typename traits_type::value_type value_type;
00127     typedef typename traits_type::difference_type difference_type;
00128 
00129     thread_index_t iam = omp_get_thread_num();
00130     DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[iam];
00131     DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
00132 
00133     // Indexing: dist[bin][processor]
00134     difference_type length = sd->starts[iam + 1] - sd->starts[iam];
00135     bin_index* oracles = new bin_index[length];
00136     difference_type* dist = new difference_type[sd->num_bins + 1];
00137     bin_index* bin_proc = new bin_index[sd->num_bins];
00138     value_type** temporaries = new value_type*[d->num_threads];
00139 
00140     // Compute oracles and count appearances.
00141     for (bin_index b = 0; b < sd->num_bins + 1; ++b)
00142       dist[b] = 0;
00143     int num_bits = sd->num_bits;
00144 
00145     random_number rng(d->seed);
00146 
00147     // First main loop.
00148     for (difference_type i = 0; i < length; ++i)
00149       {
00150         bin_index oracle = random_number_pow2(num_bits, rng);
00151         oracles[i] = oracle;
00152 
00153         // To allow prefix (partial) sum.
00154         ++(dist[oracle + 1]);
00155       }
00156 
00157     for (bin_index b = 0; b < sd->num_bins + 1; ++b)
00158       sd->dist[b][iam + 1] = dist[b];
00159 
00160 #   pragma omp barrier
00161 
00162 #   pragma omp single
00163     {
00164       // Sum up bins, sd->dist[s + 1][d->num_threads] now contains the
00165       // total number of items in bin s
00166       for (bin_index s = 0; s < sd->num_bins; ++s)
00167         __gnu_sequential::partial_sum(sd->dist[s + 1],
00168                                       sd->dist[s + 1] + d->num_threads + 1,
00169                                       sd->dist[s + 1]);
00170     }
00171 
00172 #   pragma omp barrier
00173 
00174     sequence_index_t offset = 0, global_offset = 0;
00175     for (bin_index s = 0; s < d->bins_begin; ++s)
00176       global_offset += sd->dist[s + 1][d->num_threads];
00177 
00178 #   pragma omp barrier
00179 
00180     for (bin_index s = d->bins_begin; s < d->bins_end; ++s)
00181       {
00182     for (int t = 0; t < d->num_threads + 1; ++t)
00183       sd->dist[s + 1][t] += offset;
00184     offset = sd->dist[s + 1][d->num_threads];
00185       }
00186 
00187     sd->temporaries[iam] = static_cast<value_type*>(
00188       ::operator new(sizeof(value_type) * offset));
00189 
00190 #   pragma omp barrier
00191 
00192     // Draw local copies to avoid false sharing.
00193     for (bin_index b = 0; b < sd->num_bins + 1; ++b)
00194       dist[b] = sd->dist[b][iam];
00195     for (bin_index b = 0; b < sd->num_bins; ++b)
00196       bin_proc[b] = sd->bin_proc[b];
00197     for (thread_index_t t = 0; t < d->num_threads; ++t)
00198       temporaries[t] = sd->temporaries[t];
00199 
00200     RandomAccessIterator source = sd->source;
00201     difference_type start = sd->starts[iam];
00202 
00203     // Distribute according to oracles, second main loop.
00204     for (difference_type i = 0; i < length; ++i)
00205       {
00206         bin_index target_bin = oracles[i];
00207         thread_index_t target_p = bin_proc[target_bin];
00208 
00209         // Last column [d->num_threads] stays unchanged.
00210         ::new(&(temporaries[target_p][dist[target_bin + 1]++]))
00211         value_type(*(source + i + start));
00212       }
00213 
00214     delete[] oracles;
00215     delete[] dist;
00216     delete[] bin_proc;
00217     delete[] temporaries;
00218 
00219 #   pragma omp barrier
00220 
00221     // Shuffle bins internally.
00222     for (bin_index b = d->bins_begin; b < d->bins_end; ++b)
00223       {
00224         value_type* begin =
00225                     sd->temporaries[iam] +
00226                     ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
00227                   * end =
00228                     sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
00229         sequential_random_shuffle(begin, end, rng);
00230         std::copy(begin, end, sd->source + global_offset +
00231             ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
00232       }
00233 
00234     ::operator delete(sd->temporaries[iam]);
00235   }
00236 
00237 /** @brief Round up to the next greater power of 2.
00238   *  @param x Integer to round up */
00239 template<typename T>
00240   T 
00241   round_up_to_pow2(T x)
00242   {
00243     if (x <= 1)
00244       return 1;
00245     else
00246       return (T)1 << (__log2(x - 1) + 1);
00247   }
00248 
00249 /** @brief Main parallel random shuffle step.
00250   *  @param begin Begin iterator of sequence.
00251   *  @param end End iterator of sequence.
00252   *  @param n Length of sequence.
00253   *  @param num_threads Number of threads to use.
00254   *  @param rng Random number generator to use.
00255   */
00256 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00257   void
00258   parallel_random_shuffle_drs(RandomAccessIterator begin,
00259                   RandomAccessIterator end,
00260                   typename std::iterator_traits
00261                   <RandomAccessIterator>::difference_type n,
00262                   thread_index_t num_threads,
00263                   RandomNumberGenerator& rng)
00264   {
00265     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00266     typedef typename traits_type::value_type value_type;
00267     typedef typename traits_type::difference_type difference_type;
00268 
00269     _GLIBCXX_CALL(n)
00270 
00271     const _Settings& __s = _Settings::get();
00272 
00273     if (num_threads > n)
00274       num_threads = static_cast<thread_index_t>(n);
00275 
00276     bin_index num_bins, num_bins_cache;
00277 
00278 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00279     // Try the L1 cache first.
00280 
00281     // Must fit into L1.
00282     num_bins_cache = std::max<difference_type>(
00283         1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
00284     num_bins_cache = round_up_to_pow2(num_bins_cache);
00285 
00286     // No more buckets than TLB entries, power of 2
00287     // Power of 2 and at least one element per bin, at most the TLB size.
00288     num_bins = std::min<difference_type>(n, num_bins_cache);
00289 
00290 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00291     // 2 TLB entries needed per bin.
00292     num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins);
00293 #endif
00294     num_bins = round_up_to_pow2(num_bins);
00295 
00296     if (num_bins < num_bins_cache)
00297       {
00298 #endif
00299         // Now try the L2 cache
00300         // Must fit into L2
00301         num_bins_cache = static_cast<bin_index>(std::max<difference_type>(
00302             1, n / (__s.L2_cache_size / sizeof(value_type))));
00303         num_bins_cache = round_up_to_pow2(num_bins_cache);
00304 
00305         // No more buckets than TLB entries, power of 2.
00306         num_bins = static_cast<bin_index>(
00307             std::min(n, static_cast<difference_type>(num_bins_cache)));
00308         // Power of 2 and at least one element per bin, at most the TLB size.
00309 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00310         // 2 TLB entries needed per bin.
00311         num_bins = std::min(
00312             static_cast<difference_type>(__s.TLB_size / 2), num_bins);
00313 #endif
00314           num_bins = round_up_to_pow2(num_bins);
00315 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00316       }
00317 #endif
00318 
00319     num_threads = std::min<bin_index>(num_threads, num_bins);
00320 
00321     if (num_threads <= 1)
00322       return sequential_random_shuffle(begin, end, rng);
00323 
00324     DRandomShufflingGlobalData<RandomAccessIterator> sd(begin);
00325     DRSSorterPU<RandomAccessIterator, random_number >* pus;
00326     difference_type* starts;
00327 
00328 #   pragma omp parallel num_threads(num_threads)
00329       {
00330         thread_index_t num_threads = omp_get_num_threads();
00331 #       pragma omp single
00332           {
00333             pus = new DRSSorterPU<RandomAccessIterator, random_number>
00334                 [num_threads];
00335 
00336             sd.temporaries = new value_type*[num_threads];
00337             sd.dist = new difference_type*[num_bins + 1];
00338             sd.bin_proc = new thread_index_t[num_bins];
00339             for (bin_index b = 0; b < num_bins + 1; ++b)
00340               sd.dist[b] = new difference_type[num_threads + 1];
00341             for (bin_index b = 0; b < (num_bins + 1); ++b)
00342               {
00343                 sd.dist[0][0] = 0;
00344                 sd.dist[b][0] = 0;
00345               }
00346             starts = sd.starts = new difference_type[num_threads + 1];
00347             int bin_cursor = 0;
00348             sd.num_bins = num_bins;
00349             sd.num_bits = __log2(num_bins);
00350 
00351             difference_type chunk_length = n / num_threads,
00352                             split = n % num_threads, start = 0;
00353             difference_type bin_chunk_length = num_bins / num_threads,
00354                             bin_split = num_bins % num_threads;
00355             for (thread_index_t i = 0; i < num_threads; ++i)
00356               {
00357                 starts[i] = start;
00358                 start += (i < split) ? (chunk_length + 1) : chunk_length;
00359                 int j = pus[i].bins_begin = bin_cursor;
00360 
00361                 // Range of bins for this processor.
00362                 bin_cursor += (i < bin_split) ?
00363                     (bin_chunk_length + 1) : bin_chunk_length;
00364                 pus[i].bins_end = bin_cursor;
00365                 for (; j < bin_cursor; ++j)
00366                   sd.bin_proc[j] = i;
00367                 pus[i].num_threads = num_threads;
00368                 pus[i].seed = rng(std::numeric_limits<uint32>::max());
00369                 pus[i].sd = &sd;
00370               }
00371             starts[num_threads] = start;
00372           } //single
00373         // Now shuffle in parallel.
00374         parallel_random_shuffle_drs_pu(pus);
00375       }  // parallel
00376 
00377     delete[] starts;
00378     delete[] sd.bin_proc;
00379     for (int s = 0; s < (num_bins + 1); ++s)
00380       delete[] sd.dist[s];
00381     delete[] sd.dist;
00382     delete[] sd.temporaries;
00383 
00384     delete[] pus;
00385   }
00386 
00387 /** @brief Sequential cache-efficient random shuffle.
00388  *  @param begin Begin iterator of sequence.
00389  *  @param end End iterator of sequence.
00390  *  @param rng Random number generator to use.
00391  */
00392 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00393   void
00394   sequential_random_shuffle(RandomAccessIterator begin, 
00395                             RandomAccessIterator end,
00396                             RandomNumberGenerator& rng)
00397   {
00398     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00399     typedef typename traits_type::value_type value_type;
00400     typedef typename traits_type::difference_type difference_type;
00401 
00402     difference_type n = end - begin;
00403     const _Settings& __s = _Settings::get();
00404 
00405     bin_index num_bins, num_bins_cache;
00406 
00407 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00408     // Try the L1 cache first, must fit into L1.
00409     num_bins_cache =
00410         std::max<difference_type>
00411             (1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
00412     num_bins_cache = round_up_to_pow2(num_bins_cache);
00413 
00414     // No more buckets than TLB entries, power of 2
00415     // Power of 2 and at least one element per bin, at most the TLB size
00416     num_bins = std::min(n, (difference_type)num_bins_cache);
00417 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00418     // 2 TLB entries needed per bin
00419     num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins);
00420 #endif
00421     num_bins = round_up_to_pow2(num_bins);
00422 
00423     if (num_bins < num_bins_cache)
00424       {
00425 #endif
00426         // Now try the L2 cache, must fit into L2.
00427         num_bins_cache =
00428             static_cast<bin_index>(std::max<difference_type>(
00429                 1, n / (__s.L2_cache_size / sizeof(value_type))));
00430         num_bins_cache = round_up_to_pow2(num_bins_cache);
00431 
00432         // No more buckets than TLB entries, power of 2
00433         // Power of 2 and at least one element per bin, at most the TLB size.
00434         num_bins = static_cast<bin_index>
00435             (std::min(n, static_cast<difference_type>(num_bins_cache)));
00436 
00437 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00438         // 2 TLB entries needed per bin
00439         num_bins =
00440             std::min<difference_type>(__s.TLB_size / 2, num_bins);
00441 #endif
00442         num_bins = round_up_to_pow2(num_bins);
00443 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00444       }
00445 #endif
00446 
00447     int num_bits = __log2(num_bins);
00448 
00449     if (num_bins > 1)
00450       {
00451         value_type* target = static_cast<value_type*>(
00452           ::operator new(sizeof(value_type) * n));
00453         bin_index* oracles = new bin_index[n];
00454         difference_type* dist0 = new difference_type[num_bins + 1],
00455                        * dist1 = new difference_type[num_bins + 1];
00456 
00457         for (int b = 0; b < num_bins + 1; ++b)
00458           dist0[b] = 0;
00459 
00460         random_number bitrng(rng(0xFFFFFFFF));
00461 
00462         for (difference_type i = 0; i < n; ++i)
00463           {
00464             bin_index oracle = random_number_pow2(num_bits, bitrng);
00465             oracles[i] = oracle;
00466 
00467             // To allow prefix (partial) sum.
00468             ++(dist0[oracle + 1]);
00469           }
00470 
00471         // Sum up bins.
00472         __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
00473 
00474         for (int b = 0; b < num_bins + 1; ++b)
00475           dist1[b] = dist0[b];
00476 
00477         // Distribute according to oracles.
00478         for (difference_type i = 0; i < n; ++i)
00479           ::new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i));
00480 
00481         for (int b = 0; b < num_bins; ++b)
00482           {
00483             sequential_random_shuffle(target + dist1[b],
00484                                       target + dist1[b + 1],
00485                                       rng);
00486           }
00487 
00488         // Copy elements back.
00489         std::copy(target, target + n, begin);
00490 
00491         delete[] dist0;
00492         delete[] dist1;
00493         delete[] oracles;
00494         ::operator delete(target);
00495       }
00496     else
00497       __gnu_sequential::random_shuffle(begin, end, rng);
00498   }
00499 
00500 /** @brief Parallel random public call.
00501  *  @param begin Begin iterator of sequence.
00502  *  @param end End iterator of sequence.
00503  *  @param rng Random number generator to use.
00504  */
00505 template<typename RandomAccessIterator, typename RandomNumberGenerator>
00506   inline void
00507   parallel_random_shuffle(RandomAccessIterator begin,
00508                           RandomAccessIterator end,
00509                           RandomNumberGenerator rng = random_number())
00510   {
00511     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00512     typedef typename traits_type::difference_type difference_type;
00513     difference_type n = end - begin;
00514     parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ;
00515   }
00516 
00517 }
00518 
00519 #endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */

Generated on 19 Jun 2018 for libstdc++ by  doxygen 1.6.1