-
cudaGetDeviceCount:获取GPU设备数量;
cudaError_t cudaGetDeviceCount(int *count)
- count:保存设备数量的指针;
-
cudaDeviceProp:设备属性结构体[C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.1\include\driver_types.h];
struct __device_builtin__ cudaDeviceProp { char name[256]; < ASCII string identifying device>
cudaUUID_t uuid; < 16-byte unique identifier>
char luid[8]; < 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms>
unsigned int luidDeviceNodeMask; < LUID device node mask. Value is undefined on TCC and non-Windows platforms>
size_t totalGlobalMem; < Global memory available on device in bytes >
size_t sharedMemPerBlock; < Shared memory available per block in bytes>
int regsPerBlock; < 32-bit registers available per block>
int warpSize; < Warp size in threads >
size_t memPitch; < Maximum pitch in bytes allowed by memory copies >
int maxThreadsPerBlock; < Maximum number of threads per block >
int maxThreadsDim[3]; < Maximum size of each dimension of a block >
int maxGridSize[3]; < Maximum size of each dimension of a grid >
int clockRate; < Clock frequency in kilohertz >
size_t totalConstMem; < Constant memory available on device in bytes >
int major; < Major compute capability >
int minor; < Minor compute capability >
size_t textureAlignment; < Alignment requirement for textures >
size_t texturePitchAlignment; < Pitch alignment requirement for texture references bound to pitched memory >
int deviceOverlap; < Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. >
int multiProcessorCount; < Number of multiprocessors on device >
int kernelExecTimeoutEnabled; < Specified whether there is a run time limit on kernels >
int integrated; < Device is integrated as opposed to discrete >
int canMapHostMemory; < Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer >
int computeMode; < Compute mode (See ::cudaComputeMode) >
int maxTexture1D; < Maximum 1D texture size >
int maxTexture1DMipmap; < Maximum 1D mipmapped texture size >
int maxTexture1DLinear; < Maximum size for 1D textures bound to linear memory >
int maxTexture2D[2]; < Maximum 2D texture dimensions >
int maxTexture2DMipmap[2]; < Maximum 2D mipmapped texture dimensions >
int maxTexture2DLinear[3]; < Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory >
int maxTexture2DGather[2]; < Maximum 2D texture dimensions if texture gather operations have to be performed >
int maxTexture3D[3]; < Maximum 3D texture dimensions >
int maxTexture3DAlt[3]; < Maximum alternate 3D texture dimensions >
int maxTextureCubemap; < Maximum Cubemap texture dimensions >
int maxTexture1DLayered[2]; < Maximum 1D layered texture dimensions >
int maxTexture2DLayered[3]; < Maximum 2D layered texture dimensions >
int maxTextureCubemapLayered[2]; < Maximum Cubemap layered texture dimensions >
int maxSurface1D; < Maximum 1D surface size >
int maxSurface2D[2]; < Maximum 2D surface dimensions >
int maxSurface3D[3]; < Maximum 3D surface dimensions >
int maxSurface1DLayered[2]; < Maximum 1D layered surface dimensions >
int maxSurface2DLayered[3]; < Maximum 2D layered surface dimensions >
int maxSurfaceCubemap; < Maximum Cubemap surface dimensions >
int maxSurfaceCubemapLayered[2]; < Maximum Cubemap layered surface dimensions >
size_t surfaceAlignment; < Alignment requirements for surfaces >
int concurrentKernels; < Device can possibly execute multiple kernels concurrently >
int ECCEnabled; < Device has ECC support enabled >
int pciBusID; < PCI bus ID of the device >
int pciDeviceID; < PCI device ID of the device >
int pciDomainID; < PCI domain ID of the device >
int tccDriver; < 1 if device is a Tesla device using TCC driver, 0 otherwise >
int asyncEngineCount; < Number of asynchronous engines >
int unifiedAddressing; < Device shares a unified address space with the host >
int memoryClockRate; < Peak memory clock frequency in kilohertz >
int memoryBusWidth; < Global memory bus width in bits >
int l2CacheSize; < Size of L2 cache in bytes >
int persistingL2CacheMaxSize; < Device's maximum l2 persisting lines capacity setting in bytes >
int maxThreadsPerMultiProcessor; < Maximum resident threads per multiprocessor >
int streamPrioritiesSupported; < Device supports stream priorities >
int globalL1CacheSupported; < Device supports caching globals in L1 >
int localL1CacheSupported; < Device supports caching locals in L1 >
size_t sharedMemPerMultiprocessor; < Shared memory available per multiprocessor in bytes >
int regsPerMultiprocessor; < 32-bit registers available per multiprocessor >
int managedMemory; < Device supports allocating managed memory on this system >
int isMultiGpuBoard; < Device is on a multi-GPU board >
int multiGpuBoardGroupID; < Unique identifier for a group of devices on the same multi-GPU board >
int hostNativeAtomicSupported; < Link between the device and the host supports native atomic operations >
int singleToDoublePrecisionPerfRatio; < Ratio of single precision performance (in floating-point operations per second) to double precision performance >
int pageableMemoryAccess; < Device supports coherently accessing pageable memory without calling cudaHostRegister on it >
int concurrentManagedAccess; < Device can coherently access managed memory concurrently with the CPU >
int computePreemptionSupported; < Device supports Compute Preemption >
int canUseHostPointerForRegisteredMem; < Device can access host registered memory at the same virtual address as the CPU >
int cooperativeLaunch; < Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel >
int cooperativeMultiDeviceLaunch; < Device can participate in cooperative kernels launched via ::cudaLaunchCooperativeKernelMultiDevice >
size_t sharedMemPerBlockOptin; < Per device maximum shared memory per block usable by special opt in >
int pageableMemoryAccessUsesHostPageTables; < Device accesses pageable memory via the host's page tables >
int directManagedMemAccessFromHost; < Host can directly access managed memory on the device without migration. >
int maxBlocksPerMultiProcessor; < Maximum number of resident blocks per multiprocessor >
int accessPolicyMaxWindowSize; < The maximum value of ::cudaAccessPolicyWindow::num_bytes. >
size_t reservedSharedMemPerBlock; < Shared memory reserved by CUDA driver per block in bytes >
};
-
cudaSetDevice:使用某个设备;
Get_CUDA_Info
Folders and files
Name | Name | Last commit date | ||
---|---|---|---|---|
parent directory.. | ||||