partition.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 3, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file parallel/partition.h
00026  *  @brief Parallel implementation of std::partition(),
00027  *  std::nth_element(), and std::partial_sort().
00028  *  This file is a GNU parallel extension to the Standard C++ Library.
00029  */
00030 
00031 // Written by Johannes Singler and Felix Putze.
00032 
00033 #ifndef _GLIBCXX_PARALLEL_PARTITION_H
00034 #define _GLIBCXX_PARALLEL_PARTITION_H 1
00035 
00036 #include <parallel/basic_iterator.h>
00037 #include <parallel/sort.h>
00038 #include <parallel/random_number.h>
00039 #include <bits/stl_algo.h>
00040 #include <parallel/parallel.h>
00041 
00042 /** @brief Decide whether to declare certain variables volatile. */
00043 #define _GLIBCXX_VOLATILE volatile
00044 
00045 namespace __gnu_parallel
00046 {
00047 /** @brief Parallel implementation of std::partition.
00048   *  @param begin Begin iterator of input sequence to split.
00049   *  @param end End iterator of input sequence to split.
00050   *  @param pred Partition predicate, possibly including some kind of pivot.
00051   *  @param num_threads Maximum number of threads to use for this task.
00052   *  @return Number of elements not fulfilling the predicate. */
00053 template<typename RandomAccessIterator, typename Predicate>
00054   typename std::iterator_traits<RandomAccessIterator>::difference_type
00055   parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
00056                      Predicate pred, thread_index_t num_threads)
00057   {
00058     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00059     typedef typename traits_type::value_type value_type;
00060     typedef typename traits_type::difference_type difference_type;
00061 
00062     difference_type n = end - begin;
00063 
00064     _GLIBCXX_CALL(n)
00065 
00066     const _Settings& __s = _Settings::get();
00067 
00068     // Shared.
00069     _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
00070     _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
00071     _GLIBCXX_VOLATILE difference_type leftnew, rightnew;
00072 
00073     bool* reserved_left = NULL, * reserved_right = NULL;
00074 
00075     difference_type chunk_size = __s.partition_chunk_size;
00076 
00077     omp_lock_t result_lock;
00078     omp_init_lock(&result_lock);
00079 
00080     //at least two chunks per thread
00081     if(right - left + 1 >= 2 * num_threads * chunk_size)
00082 #   pragma omp parallel num_threads(num_threads)
00083       {
00084 #       pragma omp single
00085           {
00086             num_threads = omp_get_num_threads();
00087             reserved_left = new bool[num_threads];
00088             reserved_right = new bool[num_threads];
00089 
00090             if (__s.partition_chunk_share > 0.0)
00091               chunk_size = std::max<difference_type>(__s.partition_chunk_size,
00092                     (double)n * __s.partition_chunk_share
00093                              / (double)num_threads);
00094             else
00095               chunk_size = __s.partition_chunk_size;
00096           }
00097 
00098         while (right - left + 1 >= 2 * num_threads * chunk_size)
00099           {
00100 #           pragma omp single
00101               {
00102                 difference_type num_chunks = (right - left + 1) / chunk_size;
00103 
00104                 for (int r = 0; r < num_threads; ++r)
00105                   {
00106                     reserved_left[r] = false;
00107                     reserved_right[r] = false;
00108                   }
00109                 leftover_left = 0;
00110                 leftover_right = 0;
00111               } //implicit barrier
00112 
00113             // Private.
00114             difference_type thread_left, thread_left_border,
00115                             thread_right, thread_right_border;
00116             thread_left = left + 1;
00117 
00118             // Just to satisfy the condition below.
00119             thread_left_border = thread_left - 1;
00120             thread_right = n - 1;
00121             thread_right_border = thread_right + 1;
00122 
00123             bool iam_finished = false;
00124             while (!iam_finished)
00125               {
00126                 if (thread_left > thread_left_border)
00127                   {
00128                     omp_set_lock(&result_lock);
00129                     if (left + (chunk_size - 1) > right)
00130                       iam_finished = true;
00131                     else
00132                       {
00133                         thread_left = left;
00134                         thread_left_border = left + (chunk_size - 1);
00135                         left += chunk_size;
00136                       }
00137                     omp_unset_lock(&result_lock);
00138                   }
00139 
00140                 if (thread_right < thread_right_border)
00141                   {
00142                     omp_set_lock(&result_lock);
00143                     if (left > right - (chunk_size - 1))
00144                       iam_finished = true;
00145                     else
00146                       {
00147                         thread_right = right;
00148                         thread_right_border = right - (chunk_size - 1);
00149                         right -= chunk_size;
00150                       }
00151                     omp_unset_lock(&result_lock);
00152                   }
00153 
00154                 if (iam_finished)
00155                   break;
00156 
00157                 // Swap as usual.
00158                 while (thread_left < thread_right)
00159                   {
00160                     while (pred(begin[thread_left])
00161                             && thread_left <= thread_left_border)
00162                       ++thread_left;
00163                     while (!pred(begin[thread_right])
00164                             && thread_right >= thread_right_border)
00165                       --thread_right;
00166 
00167                     if (thread_left > thread_left_border
00168                         || thread_right < thread_right_border)
00169                       // Fetch new chunk(s).
00170                       break;
00171 
00172                     std::swap(begin[thread_left], begin[thread_right]);
00173                     ++thread_left;
00174                     --thread_right;
00175                   }
00176               }
00177 
00178             // Now swap the leftover chunks to the right places.
00179             if (thread_left <= thread_left_border)
00180 #             pragma omp atomic
00181               ++leftover_left;
00182             if (thread_right >= thread_right_border)
00183 #             pragma omp atomic
00184               ++leftover_right;
00185 
00186 #           pragma omp barrier
00187 
00188 #           pragma omp single
00189               {
00190                 leftnew = left - leftover_left * chunk_size;
00191                 rightnew = right + leftover_right * chunk_size;
00192               }
00193 
00194 #           pragma omp barrier
00195 
00196             // <=> thread_left_border + (chunk_size - 1) >= leftnew
00197             if (thread_left <= thread_left_border
00198                 && thread_left_border >= leftnew)
00199               {
00200                 // Chunk already in place, reserve spot.
00201                 reserved_left[(left - (thread_left_border + 1)) / chunk_size]
00202                     = true;
00203               }
00204 
00205             // <=> thread_right_border - (chunk_size - 1) <= rightnew
00206             if (thread_right >= thread_right_border
00207                 && thread_right_border <= rightnew)
00208               {
00209                 // Chunk already in place, reserve spot.
00210                 reserved_right[((thread_right_border - 1) - right)
00211                    / chunk_size] = true;
00212               }
00213 
00214 #           pragma omp barrier
00215 
00216             if (thread_left <= thread_left_border
00217                 && thread_left_border < leftnew)
00218               {
00219                 // Find spot and swap.
00220                 difference_type swapstart = -1;
00221                 omp_set_lock(&result_lock);
00222                 for (int r = 0; r < leftover_left; ++r)
00223                   if (!reserved_left[r])
00224                     {
00225                       reserved_left[r] = true;
00226                       swapstart = left - (r + 1) * chunk_size;
00227                       break;
00228                     }
00229                 omp_unset_lock(&result_lock);
00230 
00231 #if _GLIBCXX_ASSERTIONS
00232                 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
00233 #endif
00234 
00235                 std::swap_ranges(begin + thread_left_border
00236                  - (chunk_size - 1),
00237                  begin + thread_left_border + 1,
00238                  begin + swapstart);
00239               }
00240 
00241             if (thread_right >= thread_right_border
00242                 && thread_right_border > rightnew)
00243               {
00244                 // Find spot and swap
00245                 difference_type swapstart = -1;
00246                 omp_set_lock(&result_lock);
00247                 for (int r = 0; r < leftover_right; ++r)
00248                   if (!reserved_right[r])
00249                     {
00250                       reserved_right[r] = true;
00251                       swapstart = right + r * chunk_size + 1;
00252                       break;
00253                     }
00254                 omp_unset_lock(&result_lock);
00255 
00256 #if _GLIBCXX_ASSERTIONS
00257                 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
00258 #endif
00259 
00260                 std::swap_ranges(begin + thread_right_border,
00261                  begin + thread_right_border + chunk_size,
00262                  begin + swapstart);
00263               }
00264 #if _GLIBCXX_ASSERTIONS
00265 #             pragma omp barrier
00266 
00267 #             pragma omp single
00268                 {
00269                   for (int r = 0; r < leftover_left; ++r)
00270                     _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
00271                   for (int r = 0; r < leftover_right; ++r)
00272                     _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
00273                 }
00274 
00275 #             pragma omp barrier
00276 #endif
00277 
00278 #             pragma omp barrier
00279 
00280               left = leftnew;
00281               right = rightnew;
00282           }
00283 #         pragma omp flush(left, right)
00284       } // end "recursion" //parallel
00285 
00286     difference_type final_left = left, final_right = right;
00287 
00288     while (final_left < final_right)
00289       {
00290         // Go right until key is geq than pivot.
00291         while (pred(begin[final_left]) && final_left < final_right)
00292           ++final_left;
00293 
00294         // Go left until key is less than pivot.
00295         while (!pred(begin[final_right]) && final_left < final_right)
00296           --final_right;
00297 
00298         if (final_left == final_right)
00299           break;
00300         std::swap(begin[final_left], begin[final_right]);
00301         ++final_left;
00302         --final_right;
00303       }
00304 
00305     // All elements on the left side are < piv, all elements on the
00306     // right are >= piv
00307     delete[] reserved_left;
00308     delete[] reserved_right;
00309 
00310     omp_destroy_lock(&result_lock);
00311 
00312     // Element "between" final_left and final_right might not have
00313     // been regarded yet
00314     if (final_left < n && !pred(begin[final_left]))
00315       // Really swapped.
00316       return final_left;
00317     else
00318       return final_left + 1;
00319   }
00320 
00321 /**
00322   *  @brief Parallel implementation of std::nth_element().
00323   *  @param begin Begin iterator of input sequence.
00324   *  @param nth Iterator of element that must be in position afterwards.
00325   *  @param end End iterator of input sequence.
00326   *  @param comp Comparator.
00327   */
00328 template<typename RandomAccessIterator, typename Comparator>
00329   void 
00330   parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth, 
00331                RandomAccessIterator end, Comparator comp)
00332   {
00333     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00334     typedef typename traits_type::value_type value_type;
00335     typedef typename traits_type::difference_type difference_type;
00336 
00337     _GLIBCXX_CALL(end - begin)
00338 
00339     RandomAccessIterator split;
00340     random_number rng;
00341 
00342     const _Settings& __s = _Settings::get();
00343     difference_type minimum_length = std::max<difference_type>(2,
00344         std::max(__s.nth_element_minimal_n, __s.partition_minimal_n));
00345 
00346     // Break if input range to small.
00347     while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
00348       {
00349         difference_type n = end - begin;
00350 
00351         RandomAccessIterator pivot_pos = begin + rng(n);
00352 
00353         // Swap pivot_pos value to end.
00354         if (pivot_pos != (end - 1))
00355           std::swap(*pivot_pos, *(end - 1));
00356         pivot_pos = end - 1;
00357 
00358         // XXX Comparator must have first_value_type, second_value_type,
00359     // result_type
00360         // Comparator == __gnu_parallel::lexicographic<S, int,
00361     // __gnu_parallel::less<S, S> >
00362         // pivot_pos == std::pair<S, int>*
00363         // XXX binder2nd only for RandomAccessIterators??
00364         __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
00365       pred(comp, *pivot_pos);
00366 
00367         // Divide, leave pivot unchanged in last place.
00368         RandomAccessIterator split_pos1, split_pos2;
00369         split_pos1 = begin + parallel_partition(begin, end - 1, pred,
00370                         get_max_threads());
00371 
00372         // Left side: < pivot_pos; right side: >= pivot_pos
00373 
00374         // Swap pivot back to middle.
00375         if (split_pos1 != pivot_pos)
00376           std::swap(*split_pos1, *pivot_pos);
00377         pivot_pos = split_pos1;
00378 
00379         // In case all elements are equal, split_pos1 == 0
00380         if ((split_pos1 + 1 - begin) < (n >> 7)
00381         || (end - split_pos1) < (n >> 7))
00382           {
00383             // Very unequal split, one part smaller than one 128th
00384             // elements not strictly larger than the pivot.
00385             __gnu_parallel::unary_negate<__gnu_parallel::
00386 	      binder1st<Comparator, value_type, value_type, bool>, value_type>
00387           pred(__gnu_parallel::binder1st<Comparator, value_type,
00388            value_type, bool>(comp, *pivot_pos));
00389 
00390             // Find other end of pivot-equal range.
00391             split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
00392                              end, pred);
00393           }
00394         else
00395           // Only skip the pivot.
00396           split_pos2 = split_pos1 + 1;
00397 
00398         // Compare iterators.
00399         if (split_pos2 <= nth)
00400           begin = split_pos2;
00401         else if (nth < split_pos1)
00402           end = split_pos1;
00403         else
00404           break;
00405       }
00406 
00407     // Only at most _Settings::partition_minimal_n elements left.
00408     __gnu_sequential::nth_element(begin, nth, end, comp);
00409   }
00410 
00411 /** @brief Parallel implementation of std::partial_sort().
00412 *  @param begin Begin iterator of input sequence.
00413 *  @param middle Sort until this position.
00414 *  @param end End iterator of input sequence.
00415 *  @param comp Comparator. */
00416 template<typename RandomAccessIterator, typename Comparator>
00417   void
00418   parallel_partial_sort(RandomAccessIterator begin,
00419             RandomAccessIterator middle,
00420             RandomAccessIterator end, Comparator comp)
00421   {
00422     parallel_nth_element(begin, middle, end, comp);
00423     std::sort(begin, middle, comp);
00424   }
00425 
00426 } //namespace __gnu_parallel
00427 
00428 #undef _GLIBCXX_VOLATILE
00429 
00430 #endif /* _GLIBCXX_PARALLEL_PARTITION_H */

Generated on 19 Jun 2018 for libstdc++ by  doxygen 1.6.1