00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H
00033 #define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1
00034
00035 #include <parallel/types.h>
00036 #include <parallel/base.h>
00037
00038 #if defined(__SUNPRO_CC) && defined(__sparc)
00039 #include <sys/atomic.h>
00040 #endif
00041
00042 #if !defined(_WIN32) || defined (__CYGWIN__)
00043 #include <sched.h>
00044 #endif
00045
00046 #if defined(_MSC_VER)
00047 #include <Windows.h>
00048 #include <intrin.h>
00049 #undef max
00050 #undef min
00051 #endif
00052
00053 #ifdef __MINGW32__
00054
00055
00056
00057 extern "C"
00058 __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long);
00059 #endif
00060
00061 namespace __gnu_parallel
00062 {
00063 #if defined(__ICC)
00064 template<typename must_be_int = int>
00065 int32 faa32(int32* x, int32 inc)
00066 {
00067 asm volatile("lock xadd %0,%1"
00068 : "=r" (inc), "=m" (*x)
00069 : "0" (inc)
00070 : "memory");
00071 return inc;
00072 }
00073 #if defined(__x86_64)
00074 template<typename must_be_int = int>
00075 int64 faa64(int64* x, int64 inc)
00076 {
00077 asm volatile("lock xadd %0,%1"
00078 : "=r" (inc), "=m" (*x)
00079 : "0" (inc)
00080 : "memory");
00081 return inc;
00082 }
00083 #endif
00084 #endif
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094 inline int32
00095 fetch_and_add_32(volatile int32* ptr, int32 addend)
00096 {
00097 #if defined(__ICC) //x86 version
00098 return _InterlockedExchangeAdd((void*)ptr, addend);
00099 #elif defined(__ECC) //IA-64 version
00100 return _InterlockedExchangeAdd((void*)ptr, addend);
00101 #elif defined(__ICL) || defined(_MSC_VER)
00102 return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(ptr),
00103 addend);
00104 #elif defined(__GNUC__)
00105 return __sync_fetch_and_add(ptr, addend);
00106 #elif defined(__SUNPRO_CC) && defined(__sparc)
00107 volatile int32 before, after;
00108 do
00109 {
00110 before = *ptr;
00111 after = before + addend;
00112 } while (atomic_cas_32((volatile unsigned int*)ptr, before,
00113 after) != before);
00114 return before;
00115 #else //fallback, slow
00116 #pragma message("slow fetch_and_add_32")
00117 int32 res;
00118 #pragma omp critical
00119 {
00120 res = *ptr;
00121 *(ptr) += addend;
00122 }
00123 return res;
00124 #endif
00125 }
00126
00127
00128
00129
00130
00131
00132
00133 inline int64
00134 fetch_and_add_64(volatile int64* ptr, int64 addend)
00135 {
00136 #if defined(__ICC) && defined(__x86_64) //x86 version
00137 return faa64<int>((int64*)ptr, addend);
00138 #elif defined(__ECC) //IA-64 version
00139 return _InterlockedExchangeAdd64((void*)ptr, addend);
00140 #elif defined(__ICL) || defined(_MSC_VER)
00141 #ifndef _WIN64
00142 _GLIBCXX_PARALLEL_ASSERT(false);
00143 return 0;
00144 #else
00145 return _InterlockedExchangeAdd64(ptr, addend);
00146 #endif
00147 #elif defined(__GNUC__) && defined(__x86_64)
00148 return __sync_fetch_and_add(ptr, addend);
00149 #elif defined(__GNUC__) && defined(__i386) && \
00150 (defined(__i686) || defined(__pentium4) || defined(__athlon))
00151 return __sync_fetch_and_add(ptr, addend);
00152 #elif defined(__SUNPRO_CC) && defined(__sparc)
00153 volatile int64 before, after;
00154 do
00155 {
00156 before = *ptr;
00157 after = before + addend;
00158 } while (atomic_cas_64((volatile unsigned long long*)ptr, before,
00159 after) != before);
00160 return before;
00161 #else //fallback, slow
00162 #if defined(__GNUC__) && defined(__i386)
00163
00164
00165 #endif
00166 #pragma message("slow fetch_and_add_64")
00167 int64 res;
00168 #pragma omp critical
00169 {
00170 res = *ptr;
00171 *(ptr) += addend;
00172 }
00173 return res;
00174 #endif
00175 }
00176
00177
00178
00179
00180
00181
00182
00183 template<typename T>
00184 inline T
00185 fetch_and_add(volatile T* ptr, T addend)
00186 {
00187 if (sizeof(T) == sizeof(int32))
00188 return (T)fetch_and_add_32((volatile int32*) ptr, (int32)addend);
00189 else if (sizeof(T) == sizeof(int64))
00190 return (T)fetch_and_add_64((volatile int64*) ptr, (int64)addend);
00191 else
00192 _GLIBCXX_PARALLEL_ASSERT(false);
00193 }
00194
00195
00196 #if defined(__ICC)
00197
00198 template<typename must_be_int = int>
00199 inline int32
00200 cas32(volatile int32* ptr, int32 old, int32 nw)
00201 {
00202 int32 before;
00203 __asm__ __volatile__("lock; cmpxchgl %1,%2"
00204 : "=a"(before)
00205 : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
00206 : "memory");
00207 return before;
00208 }
00209
00210 #if defined(__x86_64)
00211 template<typename must_be_int = int>
00212 inline int64
00213 cas64(volatile int64 *ptr, int64 old, int64 nw)
00214 {
00215 int64 before;
00216 __asm__ __volatile__("lock; cmpxchgq %1,%2"
00217 : "=a"(before)
00218 : "q"(nw), "m"(*(volatile long long*)(ptr)), "0"(old)
00219 : "memory");
00220 return before;
00221 }
00222 #endif
00223
00224 #endif
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234 inline bool
00235 compare_and_swap_32(volatile int32* ptr, int32 comparand, int32 replacement)
00236 {
00237 #if defined(__ICC) //x86 version
00238 return _InterlockedCompareExchange((void*)ptr, replacement,
00239 comparand) == comparand;
00240 #elif defined(__ECC) //IA-64 version
00241 return _InterlockedCompareExchange((void*)ptr, replacement,
00242 comparand) == comparand;
00243 #elif defined(__ICL) || defined(_MSC_VER)
00244 return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr),
00245 replacement, comparand) == comparand;
00246 #elif defined(__GNUC__)
00247 return __sync_bool_compare_and_swap(ptr, comparand, replacement);
00248 #elif defined(__SUNPRO_CC) && defined(__sparc)
00249 return atomic_cas_32((volatile unsigned int*)ptr, comparand,
00250 replacement) == comparand;
00251 #else
00252 #pragma message("slow compare_and_swap_32")
00253 bool res = false;
00254 #pragma omp critical
00255 {
00256 if (*ptr == comparand)
00257 {
00258 *ptr = replacement;
00259 res = true;
00260 }
00261 }
00262 return res;
00263 #endif
00264 }
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274 inline bool
00275 compare_and_swap_64(volatile int64* ptr, int64 comparand, int64 replacement)
00276 {
00277 #if defined(__ICC) && defined(__x86_64) //x86 version
00278 return cas64<int>(ptr, comparand, replacement) == comparand;
00279 #elif defined(__ECC) //IA-64 version
00280 return _InterlockedCompareExchange64((void*)ptr, replacement,
00281 comparand) == comparand;
00282 #elif defined(__ICL) || defined(_MSC_VER)
00283 #ifndef _WIN64
00284 _GLIBCXX_PARALLEL_ASSERT(false);
00285 return 0;
00286 #else
00287 return _InterlockedCompareExchange64(ptr, replacement,
00288 comparand) == comparand;
00289 #endif
00290
00291 #elif defined(__GNUC__) && defined(__x86_64)
00292 return __sync_bool_compare_and_swap(ptr, comparand, replacement);
00293 #elif defined(__GNUC__) && defined(__i386) && \
00294 (defined(__i686) || defined(__pentium4) || defined(__athlon))
00295 return __sync_bool_compare_and_swap(ptr, comparand, replacement);
00296 #elif defined(__SUNPRO_CC) && defined(__sparc)
00297 return atomic_cas_64((volatile unsigned long long*)ptr,
00298 comparand, replacement) == comparand;
00299 #else
00300 #if defined(__GNUC__) && defined(__i386)
00301
00302
00303 #endif
00304 #pragma message("slow compare_and_swap_64")
00305 bool res = false;
00306 #pragma omp critical
00307 {
00308 if (*ptr == comparand)
00309 {
00310 *ptr = replacement;
00311 res = true;
00312 }
00313 }
00314 return res;
00315 #endif
00316 }
00317
00318
00319
00320
00321
00322
00323
00324
00325 template<typename T>
00326 inline bool
00327 compare_and_swap(volatile T* ptr, T comparand, T replacement)
00328 {
00329 if (sizeof(T) == sizeof(int32))
00330 return compare_and_swap_32((volatile int32*) ptr, (int32)comparand, (int32)replacement);
00331 else if (sizeof(T) == sizeof(int64))
00332 return compare_and_swap_64((volatile int64*) ptr, (int64)comparand, (int64)replacement);
00333 else
00334 _GLIBCXX_PARALLEL_ASSERT(false);
00335 }
00336
00337
00338
00339 inline void
00340 yield()
00341 {
00342 #if defined (_WIN32) && !defined (__CYGWIN__)
00343 Sleep(0);
00344 #else
00345 sched_yield();
00346 #endif
00347 }
00348 }
00349
00350 #endif