#include #include static int _Tnum_timers; static CUevent *_Tstart_events, *_Tstop_events; static CUstream _Tstream; void init_timers (int ntimers) { int i; CUresult r; _Tnum_timers = ntimers; _Tstart_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent)); _Tstop_events = (CUevent *) malloc (_Tnum_timers * sizeof (CUevent)); r = cuStreamCreate (&_Tstream, CU_STREAM_DEFAULT); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuStreamCreate failed: %d\n", r); abort (); } for (i = 0; i < _Tnum_timers; i++) { r = cuEventCreate (&_Tstart_events[i], CU_EVENT_DEFAULT); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuEventCreate failed: %d\n", r); abort (); } r = cuEventCreate (&_Tstop_events[i], CU_EVENT_DEFAULT); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuEventCreate failed: %d\n", r); abort (); } } } void fini_timers (void) { int i; for (i = 0; i < _Tnum_timers; i++) { cuEventDestroy (_Tstart_events[i]); cuEventDestroy (_Tstop_events[i]); } cuStreamDestroy (_Tstream); free (_Tstart_events); free (_Tstop_events); } void start_timer (int timer) { CUresult r; r = cuEventRecord (_Tstart_events[timer], _Tstream); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuEventRecord failed: %d\n", r); abort (); } } float stop_timer (int timer) { CUresult r; float etime; r = cuEventRecord (_Tstop_events[timer], _Tstream); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuEventRecord failed: %d\n", r); abort (); } r = cuEventSynchronize (_Tstop_events[timer]); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuEventSynchronize failed: %d\n", r); abort (); } r = cuEventElapsedTime (&etime, _Tstart_events[timer], _Tstop_events[timer]); if (r != CUDA_SUCCESS) { fprintf (stderr, "cuEventElapsedTime failed: %d\n", r); abort (); } return etime; }