#include #include #include #include #include #include #include #include #define LOOP_ITERATIONS 10000000 #define TWO_THREADS() 2 #define USE_MUTEX() 1 #define THREAD_0_CPU 0 #define THREAD_1_CPU 1 #define THREAD_2_CPU 3 sem_t thread1sem; char buf1[80]; sem_t thread2sem; char buf2[80]; #if USE_MUTEX() pthread_mutex_t thread1mutex; #endif char buf3[80]; #if USE_MUTEX() pthread_mutex_t thread2mutex; #endif char buf4[80]; unsigned long counter1; char buf5[80]; unsigned long counter2; char buf6[80]; unsigned long long startTime1; char buf7[80]; unsigned long long endTime1; char buf8[80]; unsigned long long startTime2; char buf9[80]; unsigned long long endTime2; char buf10[80]; extern "C" long long timespec_to_nanosecond(struct timespec const *ts) { long long nanosecond = ts->tv_sec; nanosecond *= 1000000000; nanosecond += ts->tv_nsec; return nanosecond; } bool setThreadAffinity(int cpu) { unsigned num_elements = 0; int *rsizep, masksize_bytes, size; unsigned *rmaskp, *imaskp; void *my_data; if(cpu >= _syspage_ptr->num_cpu) { std::cerr << "Invalid cpu number" << std::endl; return false; } /* Determine the number of array elements required to hold * the runmasks, based on the number of CPUs in the system. */ num_elements = RMSK_SIZE(_syspage_ptr->num_cpu); /* Determine the size of the runmask, in bytes. */ masksize_bytes = num_elements * sizeof(unsigned); /* Allocate memory for the data structure that we'll pass * to ThreadCtl(). We need space for an integer (the number * of elements in each mask array) and the two masks * (runmask and inherit mask). */ size = sizeof(int) + 2 * masksize_bytes; if ((my_data = malloc(size)) == NULL) { std::cerr << "setThreadAffinity: not enough memory" << std::endl; return false; } else { memset(my_data, 0x00, size); /* Set up pointers to the "members" of the structure. */ rsizep = (int *)my_data; rmaskp = (unsigned *)(rsizep + 1); imaskp = rmaskp + num_elements; /* Set the size. */ *rsizep = num_elements; /* Set the runmask. Call this macro once for each processor the thread can run on. */ RMSK_SET(cpu, rmaskp); /* Set the inherit mask. Call this macro once for each processor the thread's children can run on. */ for(int i = 0; i < _syspage_ptr->num_cpu; ++i) { RMSK_SET(i, imaskp); } if ( ThreadCtl( _NTO_TCTL_RUNMASK_GET_AND_SET_INHERIT, my_data) == -1) { perror("_NTO_TCTL_RUNMASK_GET_AND_SET_INHERIT"); return false; } } return true; } void * thread1(void *arg) { struct timespec currentTime; if(!setThreadAffinity(THREAD_1_CPU)) { std::cerr << "Unable to set thread affinity for cpu " << THREAD_1_CPU << std::cerr; } sem_wait(&thread1sem); // Wait on signal from parent. sleep(1); // Sleep to make sure this thread doesn't // finish before the parent has a chance // to create the other thread(s) clock_gettime(CLOCK_REALTIME, ¤tTime); startTime1 = timespec_to_nanosecond(¤tTime); for(unsigned long long i = 0; i < LOOP_ITERATIONS; ++i) { #if USE_MUTEX() pthread_mutex_lock(&thread1mutex); #endif ++counter1; #if USE_MUTEX() pthread_mutex_unlock(&thread1mutex); #endif #if TWO_THREADS() == 0 #if USE_MUTEX() pthread_mutex_lock(&thread2mutex); #endif ++counter2; #if USE_MUTEX() pthread_mutex_unlock(&thread2mutex); #endif #endif } clock_gettime(CLOCK_REALTIME, ¤tTime); endTime1 = timespec_to_nanosecond(¤tTime); } #if TWO_THREADS() void * thread2(void *arg) { struct timespec currentTime; if(!setThreadAffinity(THREAD_2_CPU)) { std::cerr << "Unable to set thread affinity for cpu " << THREAD_2_CPU << std::cerr; } sem_wait(&thread2sem); // Wait on signal from parent. sleep(1); // Sleep to make sure this thread doesn't // finish before the parent has a chance // to create the other thread(s) clock_gettime(CLOCK_REALTIME, ¤tTime); startTime2 = timespec_to_nanosecond(¤tTime); for(unsigned long long i = 0; i < LOOP_ITERATIONS; ++i) { #if USE_MUTEX() pthread_mutex_lock(&thread2mutex); #endif ++counter2; #if USE_MUTEX() pthread_mutex_unlock(&thread2mutex); #endif } clock_gettime(CLOCK_REALTIME, ¤tTime); endTime2 = timespec_to_nanosecond(¤tTime); } #endif int main() { if(!setThreadAffinity(THREAD_0_CPU)) { std::cerr << "Unable to set thread affinity for cpu " << THREAD_1_CPU << std::cerr; } // code to use the cache spacing buffers so they don't get optimized away buf1[0] = 0x3E; buf2[0] = 0x3E; buf3[0] = 0x3E; buf4[0] = 0x3E; buf5[0] = 0x3E; buf6[0] = 0x3E; buf7[0] = 0x3E; buf8[0] = 0x3E; buf9[0] = 0x3E; buf10[0] = 0x3E; #if (USE_MUTEX() == 0) #if (TWO_THREADS() == 0) std::cout << "One Thread, no protection" << std::endl; #else std::cout << "Two Threads, no protection" << std::endl; #endif #else #if (TWO_THREADS() == 0) std::cout << "One Thread, mutexes" << std::endl; #else std::cout << "Two Threads, mutexes" << std::endl; #endif #endif #if TWO_THREADS() std::cout << "Main pinned to cpu " << THREAD_0_CPU << std::endl; std::cout << "Thread 1 pinned to cpu " << THREAD_1_CPU << std::endl; std::cout << "Thread 2 pinned to cpu " << THREAD_2_CPU << std::endl; #endif sem_init(&thread1sem,0,0); sem_init(&thread2sem,0,0); #if USE_MUTEX() pthread_mutex_init(&thread1mutex, 0); pthread_mutex_init(&thread2mutex, 0); #endif pthread_t tid1; pthread_attr_t attr1; pthread_attr_init(&attr1); struct sched_param param1; param1.sched_priority = sched_get_priority_max(SCHED_RR); pthread_create(&tid1, &attr1, &thread1, NULL); pthread_setschedparam(tid1, SCHED_RR, ¶m1); #if TWO_THREADS() pthread_t tid2; pthread_attr_t attr2; pthread_attr_init(&attr2); struct sched_param param2; param2.sched_priority = sched_get_priority_max(SCHED_RR); pthread_create(&tid2, &attr2, &thread2, NULL); pthread_setschedparam(tid2, SCHED_RR, ¶m2); #endif // timed section sem_post(&thread1sem); sem_post(&thread2sem); pthread_join(tid1, NULL); #if TWO_THREADS() pthread_join(tid2, NULL); #endif // end timed section // sleep(10); #if TWO_THREADS() printf("Thread 1 time: %20llu ns\n", (endTime1 - startTime1)); printf("Thread 2 time: %20llu ns\n", (endTime2 - startTime2)); unsigned long long startTime = std::min(startTime1, startTime2); unsigned long long endTime = std::max(endTime1, endTime2); #else unsigned long long startTime = startTime1; unsigned long long endTime = endTime1; #endif printf("Total time: %20llu ns\n", (endTime - startTime)); return 0; }