multiseq_selection.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 3, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file parallel/multiseq_selection.h
00026  *  @brief Functions to find elements of a certain global rank in
00027  *  multiple sorted sequences.  Also serves for splitting such
00028  *  sequence sets.
00029  *
00030  *  The algorithm description can be found in 
00031  *
00032  *  P. J. Varman, S. D. Scheufler, B. R. Iyer, and G. R. Ricard.
00033  *  Merging Multiple Lists on Hierarchical-Memory Multiprocessors.
00034  *  Journal of Parallel and Distributed Computing, 12(2):171–177, 1991.
00035  *
00036  *  This file is a GNU parallel extension to the Standard C++ Library.
00037  */
00038 
00039 // Written by Johannes Singler.
00040 
00041 #ifndef _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H
00042 #define _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H 1
00043 
00044 #include <vector>
00045 #include <queue>
00046 
00047 #include <bits/stl_algo.h>
00048 
00049 #include <parallel/sort.h>
00050 
00051 namespace __gnu_parallel
00052 {
00053   /** @brief Compare a pair of types lexicographically, ascending. */
00054   template<typename T1, typename T2, typename Comparator>
00055     class lexicographic
00056     : public std::binary_function<std::pair<T1, T2>, std::pair<T1, T2>, bool>
00057     {
00058     private:
00059       Comparator& comp;
00060 
00061     public:
00062       lexicographic(Comparator& _comp) : comp(_comp) { }
00063 
00064       bool
00065       operator()(const std::pair<T1, T2>& p1,
00066          const std::pair<T1, T2>& p2) const
00067       {
00068     if (comp(p1.first, p2.first))
00069       return true;
00070 
00071     if (comp(p2.first, p1.first))
00072       return false;
00073 
00074     // Firsts are equal.
00075     return p1.second < p2.second;
00076       }
00077     };
00078 
00079   /** @brief Compare a pair of types lexicographically, descending. */
00080   template<typename T1, typename T2, typename Comparator>
00081     class lexicographic_reverse : public std::binary_function<T1, T2, bool>
00082     {
00083     private:
00084       Comparator& comp;
00085 
00086     public:
00087       lexicographic_reverse(Comparator& _comp) : comp(_comp) { }
00088 
00089       bool
00090       operator()(const std::pair<T1, T2>& p1,
00091          const std::pair<T1, T2>& p2) const
00092       {
00093     if (comp(p2.first, p1.first))
00094       return true;
00095 
00096     if (comp(p1.first, p2.first))
00097       return false;
00098 
00099     // Firsts are equal.
00100     return p2.second < p1.second;
00101       }
00102     };
00103 
00104   /** 
00105    *  @brief Splits several sorted sequences at a certain global rank,
00106    *  resulting in a splitting point for each sequence.
00107    *  The sequences are passed via a sequence of random-access
00108    *  iterator pairs, none of the sequences may be empty.  If there
00109    *  are several equal elements across the split, the ones on the
00110    *  left side will be chosen from sequences with smaller number.
00111    *  @param begin_seqs Begin of the sequence of iterator pairs.
00112    *  @param end_seqs End of the sequence of iterator pairs.
00113    *  @param rank The global rank to partition at.
00114    *  @param begin_offsets A random-access sequence begin where the
00115    *  result will be stored in. Each element of the sequence is an
00116    *  iterator that points to the first element on the greater part of
00117    *  the respective sequence.
00118    *  @param comp The ordering functor, defaults to std::less<T>. 
00119    */
00120   template<typename RanSeqs, typename RankType, typename RankIterator,
00121             typename Comparator>
00122     void
00123     multiseq_partition(RanSeqs begin_seqs, RanSeqs end_seqs,
00124                        RankType rank,
00125                        RankIterator begin_offsets,
00126                        Comparator comp = std::less<
00127                        typename std::iterator_traits<typename
00128                        std::iterator_traits<RanSeqs>::value_type::
00129                        first_type>::value_type>()) // std::less<T>
00130     {
00131       _GLIBCXX_CALL(end_seqs - begin_seqs)
00132 
00133       typedef typename std::iterator_traits<RanSeqs>::value_type::first_type
00134         It;
00135       typedef typename std::iterator_traits<It>::difference_type
00136            difference_type;
00137       typedef typename std::iterator_traits<It>::value_type value_type;
00138 
00139       lexicographic<value_type, int, Comparator> lcomp(comp);
00140       lexicographic_reverse<value_type, int, Comparator> lrcomp(comp);
00141 
00142       // Number of sequences, number of elements in total (possibly
00143       // including padding).
00144       difference_type m = std::distance(begin_seqs, end_seqs), N = 0,
00145                       nmax, n, r;
00146 
00147       for (int i = 0; i < m; i++)
00148         {
00149           N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
00150           _GLIBCXX_PARALLEL_ASSERT(
00151             std::distance(begin_seqs[i].first, begin_seqs[i].second) > 0);
00152         }
00153 
00154       if (rank == N)
00155         {
00156           for (int i = 0; i < m; i++)
00157             begin_offsets[i] = begin_seqs[i].second; // Very end.
00158           // Return m - 1;
00159           return;
00160         }
00161 
00162       _GLIBCXX_PARALLEL_ASSERT(m != 0);
00163       _GLIBCXX_PARALLEL_ASSERT(N != 0);
00164       _GLIBCXX_PARALLEL_ASSERT(rank >= 0);
00165       _GLIBCXX_PARALLEL_ASSERT(rank < N);
00166 
00167       difference_type* ns = new difference_type[m];
00168       difference_type* a = new difference_type[m];
00169       difference_type* b = new difference_type[m];
00170       difference_type l;
00171 
00172       ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
00173       nmax = ns[0];
00174       for (int i = 0; i < m; i++)
00175     {
00176       ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
00177       nmax = std::max(nmax, ns[i]);
00178     }
00179 
00180       r = __log2(nmax) + 1;
00181 
00182       // Pad all lists to this length, at least as long as any ns[i],
00183       // equality iff nmax = 2^k - 1.
00184       l = (1ULL << r) - 1;
00185 
00186       for (int i = 0; i < m; i++)
00187     {
00188       a[i] = 0;
00189       b[i] = l;
00190     }
00191       n = l / 2;
00192 
00193       // Invariants:
00194       // 0 <= a[i] <= ns[i], 0 <= b[i] <= l
00195 
00196 #define S(i) (begin_seqs[i].first)
00197 
00198       // Initial partition.
00199       std::vector<std::pair<value_type, int> > sample;
00200 
00201       for (int i = 0; i < m; i++)
00202     if (n < ns[i])  //sequence long enough
00203       sample.push_back(std::make_pair(S(i)[n], i));
00204       __gnu_sequential::sort(sample.begin(), sample.end(), lcomp);
00205 
00206       for (int i = 0; i < m; i++)   //conceptual infinity
00207     if (n >= ns[i]) //sequence too short, conceptual infinity
00208       sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i));
00209 
00210       difference_type localrank = rank / l;
00211 
00212       int j;
00213       for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); ++j)
00214     a[sample[j].second] += n + 1;
00215       for (; j < m; j++)
00216     b[sample[j].second] -= n + 1;
00217       
00218       // Further refinement.
00219       while (n > 0)
00220     {
00221       n /= 2;
00222 
00223       int lmax_seq = -1;    // to avoid warning
00224       const value_type* lmax = NULL; // impossible to avoid the warning?
00225       for (int i = 0; i < m; i++)
00226         {
00227           if (a[i] > 0)
00228         {
00229           if (!lmax)
00230             {
00231               lmax = &(S(i)[a[i] - 1]);
00232               lmax_seq = i;
00233             }
00234           else
00235             {
00236               // Max, favor rear sequences.
00237               if (!comp(S(i)[a[i] - 1], *lmax))
00238             {
00239               lmax = &(S(i)[a[i] - 1]);
00240               lmax_seq = i;
00241             }
00242             }
00243         }
00244         }
00245 
00246       int i;
00247       for (i = 0; i < m; i++)
00248         {
00249           difference_type middle = (b[i] + a[i]) / 2;
00250           if (lmax && middle < ns[i] &&
00251           lcomp(std::make_pair(S(i)[middle], i),
00252             std::make_pair(*lmax, lmax_seq)))
00253         a[i] = std::min(a[i] + n + 1, ns[i]);
00254           else
00255         b[i] -= n + 1;
00256         }
00257 
00258       difference_type leftsize = 0;
00259       for (int i = 0; i < m; i++)
00260           leftsize += a[i] / (n + 1);
00261       
00262       difference_type skew = rank / (n + 1) - leftsize;
00263 
00264       if (skew > 0)
00265         {
00266           // Move to the left, find smallest.
00267           std::priority_queue<std::pair<value_type, int>,
00268         std::vector<std::pair<value_type, int> >,
00269         lexicographic_reverse<value_type, int, Comparator> >
00270         pq(lrcomp);
00271           
00272           for (int i = 0; i < m; i++)
00273         if (b[i] < ns[i])
00274           pq.push(std::make_pair(S(i)[b[i]], i));
00275 
00276           for (; skew != 0 && !pq.empty(); --skew)
00277         {
00278           int source = pq.top().second;
00279           pq.pop();
00280 
00281           a[source] = std::min(a[source] + n + 1, ns[source]);
00282           b[source] += n + 1;
00283 
00284           if (b[source] < ns[source])
00285             pq.push(std::make_pair(S(source)[b[source]], source));
00286         }
00287         }
00288       else if (skew < 0)
00289         {
00290           // Move to the right, find greatest.
00291           std::priority_queue<std::pair<value_type, int>,
00292         std::vector<std::pair<value_type, int> >,
00293         lexicographic<value_type, int, Comparator> > pq(lcomp);
00294 
00295           for (int i = 0; i < m; i++)
00296         if (a[i] > 0)
00297           pq.push(std::make_pair(S(i)[a[i] - 1], i));
00298 
00299           for (; skew != 0; ++skew)
00300         {
00301           int source = pq.top().second;
00302           pq.pop();
00303 
00304           a[source] -= n + 1;
00305           b[source] -= n + 1;
00306 
00307           if (a[source] > 0)
00308             pq.push(std::make_pair(S(source)[a[source] - 1], source));
00309         }
00310         }
00311     }
00312 
00313       // Postconditions:
00314       // a[i] == b[i] in most cases, except when a[i] has been clamped
00315       // because of having reached the boundary
00316 
00317       // Now return the result, calculate the offset.
00318 
00319       // Compare the keys on both edges of the border.
00320 
00321       // Maximum of left edge, minimum of right edge.
00322       value_type* maxleft = NULL;
00323       value_type* minright = NULL;
00324       for (int i = 0; i < m; i++)
00325     {
00326       if (a[i] > 0)
00327         {
00328           if (!maxleft)
00329         maxleft = &(S(i)[a[i] - 1]);
00330           else
00331         {
00332           // Max, favor rear sequences.
00333           if (!comp(S(i)[a[i] - 1], *maxleft))
00334             maxleft = &(S(i)[a[i] - 1]);
00335         }
00336         }
00337       if (b[i] < ns[i])
00338         {
00339           if (!minright)
00340         minright = &(S(i)[b[i]]);
00341           else
00342         {
00343           // Min, favor fore sequences.
00344           if (comp(S(i)[b[i]], *minright))
00345             minright = &(S(i)[b[i]]);
00346         }
00347         }
00348     }
00349 
00350       int seq = 0;
00351       for (int i = 0; i < m; i++)
00352     begin_offsets[i] = S(i) + a[i];
00353 
00354       delete[] ns;
00355       delete[] a;
00356       delete[] b;
00357     }
00358 
00359 
00360   /** 
00361    *  @brief Selects the element at a certain global rank from several
00362    *  sorted sequences.
00363    *
00364    *  The sequences are passed via a sequence of random-access
00365    *  iterator pairs, none of the sequences may be empty.
00366    *  @param begin_seqs Begin of the sequence of iterator pairs.
00367    *  @param end_seqs End of the sequence of iterator pairs.
00368    *  @param rank The global rank to partition at.
00369    *  @param offset The rank of the selected element in the global
00370    *  subsequence of elements equal to the selected element. If the
00371    *  selected element is unique, this number is 0.
00372    *  @param comp The ordering functor, defaults to std::less. 
00373    */
00374   template<typename T, typename RanSeqs, typename RankType,
00375        typename Comparator>
00376     T
00377     multiseq_selection(RanSeqs begin_seqs, RanSeqs end_seqs, RankType rank,
00378                RankType& offset, Comparator comp = std::less<T>())
00379     {
00380       _GLIBCXX_CALL(end_seqs - begin_seqs)
00381 
00382       typedef typename std::iterator_traits<RanSeqs>::value_type::first_type
00383     It;
00384       typedef typename std::iterator_traits<It>::difference_type
00385     difference_type;
00386 
00387       lexicographic<T, int, Comparator> lcomp(comp);
00388       lexicographic_reverse<T, int, Comparator> lrcomp(comp);
00389 
00390       // Number of sequences, number of elements in total (possibly
00391       // including padding).
00392       difference_type m = std::distance(begin_seqs, end_seqs);
00393       difference_type N = 0;
00394       difference_type nmax, n, r;
00395 
00396       for (int i = 0; i < m; i++)
00397     N += std::distance(begin_seqs[i].first, begin_seqs[i].second);
00398 
00399       if (m == 0 || N == 0 || rank < 0 || rank >= N)
00400     {
00401       // Result undefined when there is no data or rank is outside bounds.
00402       throw std::exception();
00403     }
00404 
00405 
00406       difference_type* ns = new difference_type[m];
00407       difference_type* a = new difference_type[m];
00408       difference_type* b = new difference_type[m];
00409       difference_type l;
00410 
00411       ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
00412       nmax = ns[0];
00413       for (int i = 0; i < m; ++i)
00414     {
00415       ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
00416       nmax = std::max(nmax, ns[i]);
00417     }
00418 
00419       r = __log2(nmax) + 1;
00420 
00421       // Pad all lists to this length, at least as long as any ns[i],
00422       // equality iff nmax = 2^k - 1
00423       l = pow2(r) - 1;
00424 
00425       for (int i = 0; i < m; ++i)
00426     {
00427       a[i] = 0;
00428       b[i] = l;
00429     }
00430       n = l / 2;
00431 
00432       // Invariants:
00433       // 0 <= a[i] <= ns[i], 0 <= b[i] <= l
00434 
00435 #define S(i) (begin_seqs[i].first)
00436 
00437       // Initial partition.
00438       std::vector<std::pair<T, int> > sample;
00439 
00440       for (int i = 0; i < m; i++)
00441     if (n < ns[i])
00442       sample.push_back(std::make_pair(S(i)[n], i));
00443       __gnu_sequential::sort(sample.begin(), sample.end(),
00444                  lcomp, sequential_tag());
00445 
00446       // Conceptual infinity.
00447       for (int i = 0; i < m; i++)
00448     if (n >= ns[i])
00449       sample.push_back(std::make_pair(S(i)[0] /*dummy element*/, i));
00450 
00451       difference_type localrank = rank / l;
00452 
00453       int j;
00454       for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); ++j)
00455     a[sample[j].second] += n + 1;
00456       for (; j < m; ++j)
00457     b[sample[j].second] -= n + 1;
00458 
00459       // Further refinement.
00460       while (n > 0)
00461     {
00462       n /= 2;
00463 
00464       const T* lmax = NULL;
00465       for (int i = 0; i < m; ++i)
00466         {
00467           if (a[i] > 0)
00468         {
00469           if (!lmax)
00470             lmax = &(S(i)[a[i] - 1]);
00471           else
00472             {
00473               if (comp(*lmax, S(i)[a[i] - 1]))  //max
00474             lmax = &(S(i)[a[i] - 1]);
00475             }
00476         }
00477         }
00478 
00479       int i;
00480       for (i = 0; i < m; i++)
00481         {
00482           difference_type middle = (b[i] + a[i]) / 2;
00483           if (lmax && middle < ns[i] && comp(S(i)[middle], *lmax))
00484         a[i] = std::min(a[i] + n + 1, ns[i]);
00485           else
00486         b[i] -= n + 1;
00487         }
00488 
00489       difference_type leftsize = 0;
00490       for (int i = 0; i < m; ++i)
00491           leftsize += a[i] / (n + 1);
00492 
00493       difference_type skew = rank / (n + 1) - leftsize;
00494 
00495       if (skew > 0)
00496         {
00497           // Move to the left, find smallest.
00498           std::priority_queue<std::pair<T, int>,
00499         std::vector<std::pair<T, int> >,
00500         lexicographic_reverse<T, int, Comparator> > pq(lrcomp);
00501 
00502           for (int i = 0; i < m; ++i)
00503         if (b[i] < ns[i])
00504           pq.push(std::make_pair(S(i)[b[i]], i));
00505 
00506           for (; skew != 0 && !pq.empty(); --skew)
00507         {
00508           int source = pq.top().second;
00509           pq.pop();
00510           
00511           a[source] = std::min(a[source] + n + 1, ns[source]);
00512           b[source] += n + 1;
00513           
00514           if (b[source] < ns[source])
00515             pq.push(std::make_pair(S(source)[b[source]], source));
00516         }
00517         }
00518       else if (skew < 0)
00519         {
00520           // Move to the right, find greatest.
00521           std::priority_queue<std::pair<T, int>,
00522         std::vector<std::pair<T, int> >,
00523         lexicographic<T, int, Comparator> > pq(lcomp);
00524 
00525           for (int i = 0; i < m; ++i)
00526         if (a[i] > 0)
00527           pq.push(std::make_pair(S(i)[a[i] - 1], i));
00528 
00529           for (; skew != 0; ++skew)
00530         {
00531           int source = pq.top().second;
00532           pq.pop();
00533 
00534           a[source] -= n + 1;
00535           b[source] -= n + 1;
00536 
00537           if (a[source] > 0)
00538             pq.push(std::make_pair(S(source)[a[source] - 1], source));
00539         }
00540         }
00541     }
00542 
00543       // Postconditions:
00544       // a[i] == b[i] in most cases, except when a[i] has been clamped
00545       // because of having reached the boundary
00546 
00547       // Now return the result, calculate the offset.
00548 
00549       // Compare the keys on both edges of the border.
00550 
00551       // Maximum of left edge, minimum of right edge.
00552       bool maxleftset = false, minrightset = false;
00553 
00554       // Impossible to avoid the warning?
00555       T maxleft, minright;
00556       for (int i = 0; i < m; ++i)
00557     {
00558       if (a[i] > 0)
00559         {
00560           if (!maxleftset)
00561         {
00562           maxleft = S(i)[a[i] - 1];
00563           maxleftset = true;
00564         }
00565           else
00566         {
00567           // Max.
00568           if (comp(maxleft, S(i)[a[i] - 1]))
00569             maxleft = S(i)[a[i] - 1];
00570         }
00571         }
00572       if (b[i] < ns[i])
00573         {
00574           if (!minrightset)
00575         {
00576           minright = S(i)[b[i]];
00577           minrightset = true;
00578         }
00579           else
00580         {
00581           // Min.
00582           if (comp(S(i)[b[i]], minright))
00583             minright = S(i)[b[i]];
00584         }
00585         }
00586       }
00587 
00588       // Minright is the splitter, in any case.
00589 
00590       if (!maxleftset || comp(minright, maxleft))
00591     {
00592       // Good luck, everything is split unambiguously.
00593       offset = 0;
00594     }
00595       else
00596     {
00597       // We have to calculate an offset.
00598       offset = 0;
00599 
00600       for (int i = 0; i < m; ++i)
00601         {
00602           difference_type lb = std::lower_bound(S(i), S(i) + ns[i],
00603                             minright,
00604                             comp) - S(i);
00605           offset += a[i] - lb;
00606         }
00607     }
00608 
00609       delete[] ns;
00610       delete[] a;
00611       delete[] b;
00612 
00613       return minright;
00614     }
00615 }
00616 
00617 #undef S
00618 
00619 #endif /* _GLIBCXX_PARALLEL_MULTISEQ_SELECTION_H */

Generated on 19 Jun 2018 for libstdc++ by  doxygen 1.6.1