12 #ifndef _CUTIL_INLINE_FUNCTIONS_DRVAPI_H_
13 #define _CUTIL_INLINE_FUNCTIONS_DRVAPI_H_
22 #define cutilDrvSafeCallNoSync(err) __cuSafeCallNoSync (err, __FILE__, __LINE__)
23 #define cutilDrvSafeCall(err) __cuSafeCall (err, __FILE__, __LINE__)
24 #define cutilDrvCtxSync() __cuCtxSync (__FILE__, __LINE__)
25 #define cutilDrvCheckMsg(msg) __cuCheckMsg (msg, __FILE__, __LINE__)
26 #define cutilDrvAlignOffset(offset, alignment) ( offset = (offset + (alignment-1)) & ~((alignment-1)) )
29 inline void __cuSafeCallNoSync( CUresult err,
const char *file,
const int line )
31 if( CUDA_SUCCESS != err) {
32 fprintf(stderr,
"cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
37 inline void __cuSafeCall( CUresult err,
const char *file,
const int line )
39 __cuSafeCallNoSync( err, file, line );
42 inline void __cuCtxSync(
const char *file,
const int line )
44 CUresult err = cuCtxSynchronize();
45 if( CUDA_SUCCESS != err ) {
46 fprintf(stderr,
"cuCtxSynchronize() API error = %04d in file <%s>, line %i.\n",
52 #define MIN(a,b) ((a < b) ? a : b)
53 #define MAX(a,b) ((a > b) ? a : b)
56 inline int _ConvertSMVer2CoresDrvApi(
int major,
int minor)
64 sSMtoCores nGpuArchCoresPerSM[] =
75 while (nGpuArchCoresPerSM[index].SM != -1) {
76 if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
77 return nGpuArchCoresPerSM[index].Cores;
81 printf(
"MapSMtoCores undefined SMversion %d.%d!\n", major, minor);
87 inline int cutilDrvGetMaxGflopsDeviceId()
89 CUdevice current_device = 0, max_perf_device = 0;
90 int device_count = 0, sm_per_multiproc = 0;
91 int max_compute_perf = 0, best_SM_arch = 0;
92 int major = 0, minor = 0, multiProcessorCount, clockRate;
95 cutilDrvSafeCallNoSync(cuDeviceGetCount(&device_count));
98 while ( current_device < device_count ) {
99 cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, current_device ) );
100 if (major > 0 && major < 9999) {
101 best_SM_arch = MAX(best_SM_arch, major);
108 while( current_device < device_count ) {
109 cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &multiProcessorCount,
110 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
112 cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &clockRate,
113 CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
115 cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, current_device ) );
117 if (major == 9999 && minor == 9999) {
118 sm_per_multiproc = 1;
120 sm_per_multiproc = _ConvertSMVer2CoresDrvApi(major, minor);
123 int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate;
124 if( compute_perf > max_compute_perf ) {
126 if ( best_SM_arch > 2 ) {
128 if (major == best_SM_arch) {
129 max_compute_perf = compute_perf;
130 max_perf_device = current_device;
133 max_compute_perf = compute_perf;
134 max_perf_device = current_device;
139 return max_perf_device;
143 inline int cutilDrvGetMaxGflopsGraphicsDeviceId()
145 CUdevice current_device = 0, max_perf_device = 0;
146 int device_count = 0, sm_per_multiproc = 0;
147 int max_compute_perf = 0, best_SM_arch = 0;
148 int major = 0, minor = 0, multiProcessorCount, clockRate;
150 char deviceName[256];
153 cutilDrvSafeCallNoSync(cuDeviceGetCount(&device_count));
156 while ( current_device < device_count ) {
157 cutilDrvSafeCallNoSync( cuDeviceGetName(deviceName, 256, current_device) );
158 cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, current_device ) );
160 #if CUDA_VERSION >= 3020
161 cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device ) );
164 if (deviceName[0] ==
'T') bTCC = 1;
167 if (major > 0 && major < 9999) {
168 best_SM_arch = MAX(best_SM_arch, major);
176 while( current_device < device_count ) {
177 cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &multiProcessorCount,
178 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
180 cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &clockRate,
181 CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
183 cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, current_device ) );
185 #if CUDA_VERSION >= 3020
186 cutilDrvSafeCallNoSync( cuDeviceGetAttribute( &bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device ) );
189 if (deviceName[0] ==
'T') bTCC = 1;
192 if (major == 9999 && minor == 9999) {
193 sm_per_multiproc = 1;
195 sm_per_multiproc = _ConvertSMVer2CoresDrvApi(major, minor);
201 int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate;
202 if( compute_perf > max_compute_perf ) {
204 if ( best_SM_arch > 2 ) {
206 if (major == best_SM_arch) {
207 max_compute_perf = compute_perf;
208 max_perf_device = current_device;
211 max_compute_perf = compute_perf;
212 max_perf_device = current_device;
218 return max_perf_device;
221 inline void __cuCheckMsg(
const char * msg,
const char *file,
const int line )
223 CUresult err = cuCtxSynchronize();
224 if( CUDA_SUCCESS != err) {
225 fprintf(stderr,
"cutilDrvCheckMsg -> %s", msg);
226 fprintf(stderr,
"cutilDrvCheckMsg -> cuCtxSynchronize API error = %04d in file <%s>, line %i.\n",
233 #if __DEVICE_EMULATION__
234 inline int cutilDeviceInitDrv(
int ARGC,
char **ARGV) { }
236 inline int cutilDeviceInitDrv(
int ARGC,
char ** ARGV)
240 CUresult err = cuInit(0);
241 if (CUDA_SUCCESS == err)
242 cutilDrvSafeCallNoSync(cuDeviceGetCount(&deviceCount));
243 if (deviceCount == 0) {
244 fprintf(stderr,
"CUTIL DeviceInitDrv error: no devices supporting CUDA\n");
248 cutGetCmdLineArgumenti(ARGC, (
const char **) ARGV,
"device", &dev);
249 if (dev < 0) dev = 0;
250 if (dev > deviceCount-1) {
251 fprintf(stderr,
"\n");
252 fprintf(stderr,
">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
253 fprintf(stderr,
">> cutilDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
254 fprintf(stderr,
"\n");
257 cutilDrvSafeCallNoSync(cuDeviceGet(&cuDevice, dev));
259 cuDeviceGetName(name, 100, cuDevice);
260 if (cutCheckCmdLineFlag(ARGC, (
const char **) ARGV,
"quiet") == CUTFalse) {
261 printf(
"> Using CUDA Device [%d]: %s\n", dev, name);
268 #if __DEVICE_EMULATION__
269 inline CUdevice cutilChooseCudaDeviceDrv(
int argc,
char **argv,
int *p_devID)
271 inline CUdevice cutilChooseCudaDeviceDrv(
int argc,
char **argv,
int *p_devID)
276 if( cutCheckCmdLineFlag(argc, (
const char**)argv,
"device") ) {
277 devID = cutilDeviceInitDrv(argc, argv);
279 printf(
"exiting...\n");
285 devID = cutilDrvGetMaxGflopsDeviceId();
286 cutilDrvSafeCallNoSync(cuDeviceGet(&cuDevice, devID));
287 cuDeviceGetName(name, 100, cuDevice);
288 printf(
"> Using CUDA Device [%d]: %s\n", devID, name);
290 cuDeviceGet(&cuDevice, devID);
291 if (p_devID) *p_devID = devID;
298 inline void cutilDrvCudaCheckCtxLost(
const char *errorMessage,
const char *file,
const int line )
300 CUresult err = cuCtxSynchronize();
301 if( CUDA_ERROR_INVALID_CONTEXT != err) {
302 fprintf(stderr,
"Cuda error: %s in file '%s' in line %i\n",
303 errorMessage, file, line );
306 err = cuCtxSynchronize();
307 if( CUDA_SUCCESS != err) {
308 fprintf(stderr,
"Cuda error: %s in file '%s' in line %i\n",
309 errorMessage, file, line );
316 #define STRCASECMP _stricmp
318 #define STRCASECMP strcasecmp
324 #define STRNCASECMP _strnicmp
326 #define STRNCASECMP strncasecmp
330 inline void __cutilDrvQAFinish(
int argc,
char **argv,
bool bStatus)
332 const char *sStatus[] = {
"FAILED",
"PASSED",
"WAIVED", NULL };
335 for (
int i=1; i < argc; i++) {
336 if (!STRCASECMP(argv[i],
"-qatest") || !STRCASECMP(argv[i],
"-noprompt")) {
342 printf(
"&&&& %s %s", sStatus[bStatus], argv[0]);
343 for (
int i=1; i < argc; i++) printf(
" %s", argv[i]);
345 printf(
"[%s] test result\n%s\n", argv[0], sStatus[bStatus]);
350 inline bool cutilDrvCudaDevCapabilities(
int major_version,
int minor_version,
int deviceNum,
int argc,
char** argv)
352 int major, minor, dev;
353 char device_name[256];
355 #ifdef __DEVICE_EMULATION__
356 printf(
"> Compute Device Emulation Mode \n");
359 cutilDrvSafeCallNoSync( cuDeviceGet(&dev, deviceNum) );
360 cutilDrvSafeCallNoSync( cuDeviceComputeCapability(&major, &minor, dev));
361 cutilDrvSafeCallNoSync( cuDeviceGetName(device_name, 256, dev) );
363 if((major > major_version) ||
364 (major == major_version && minor >= minor_version))
366 printf(
"> Device %d: < %s >, Compute SM %d.%d detected\n", dev, device_name, major, minor);
371 printf(
"There is no device supporting CUDA compute capability %d.%d.\n", major_version, minor_version);
372 __cutilDrvQAFinish(argc, argv,
true);
378 inline bool cutilDrvCudaCapabilities(
int major_version,
int minor_version,
int argc,
char **argv)
380 return cutilDrvCudaDevCapabilities(major_version, minor_version, 0, argc, argv);
384 #endif // _CUTIL_INLINE_FUNCTIONS_DRVAPI_H_