Changeset View
Changeset View
Standalone View
Standalone View
intern/cycles/kernel/device/hip/config.h
| Show All 29 Lines | |||||
| #define GPU_BLOCK_MAX_THREADS 1024 | #define GPU_BLOCK_MAX_THREADS 1024 | ||||
| #define GPU_THREAD_MAX_REGISTERS 255 | #define GPU_THREAD_MAX_REGISTERS 255 | ||||
| #define GPU_KERNEL_BLOCK_NUM_THREADS 1024 | #define GPU_KERNEL_BLOCK_NUM_THREADS 1024 | ||||
| #define GPU_KERNEL_MAX_REGISTERS 64 | #define GPU_KERNEL_MAX_REGISTERS 64 | ||||
| /* Compute number of threads per block and minimum blocks per multiprocessor | /* Compute number of threads per block and minimum blocks per multiprocessor | ||||
| * given the maximum number of registers per thread. */ | * given the maximum number of registers per thread. */ | ||||
| #define ccl_gpu_kernel(block_num_threads, thread_num_registers) \ | |||||
| #define ccl_gpu_kernel_threads(block_num_threads) \ | |||||
| extern "C" __global__ void __launch_bounds__(block_num_threads) | |||||
| #define ccl_gpu_kernel_threads_registers(block_num_threads, thread_num_registers) \ | |||||
| extern "C" __global__ void __launch_bounds__(block_num_threads, \ | extern "C" __global__ void __launch_bounds__(block_num_threads, \ | ||||
| GPU_MULTIPRESSOR_MAX_REGISTERS / \ | GPU_MULTIPRESSOR_MAX_REGISTERS / \ | ||||
| (block_num_threads * thread_num_registers)) | (block_num_threads * thread_num_registers)) | ||||
| /* allow ccl_gpu_kernel to accept 1 or 2 parameters */ | #define ccl_gpu_kernel_threads(block_num_threads) \ | ||||
| #define SELECT_MACRO(_1, _2, NAME, ...) NAME | extern "C" __global__ void __launch_bounds__(block_num_threads) | ||||
| #define ccl_gpu_kernel(...) \ | |||||
| SELECT_MACRO(__VA_ARGS__, ccl_gpu_kernel_threads_registers, ccl_gpu_kernel_threads)(__VA_ARGS__) | |||||
| #define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__) | #define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__) | ||||
| #define ccl_gpu_kernel_call(x) x | #define ccl_gpu_kernel_call(x) x | ||||
| /* define a function object where "func" is the lambda body, and additional parameters are used to | /* Define a function object where "func" is the lambda body, and additional parameters are used to | ||||
| * specify captured state */ | * specify captured state */ | ||||
| #define ccl_gpu_kernel_lambda(func, ...) \ | #define ccl_gpu_kernel_lambda(func, ...) \ | ||||
| struct KernelLambda { \ | struct KernelLambda { \ | ||||
| __VA_ARGS__; \ | __VA_ARGS__; \ | ||||
| __device__ int operator()(const int state) \ | __device__ int operator()(const int state) \ | ||||
| { \ | { \ | ||||
| return (func); \ | return (func); \ | ||||
| } \ | } \ | ||||
| } ccl_gpu_kernel_lambda_pass; \ | } ccl_gpu_kernel_lambda_pass | ||||
| ccl_gpu_kernel_lambda_pass | |||||
| /* sanity checks */ | /* sanity checks */ | ||||
| #if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS | #if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS | ||||
| # error "Maximum number of threads per block exceeded" | # error "Maximum number of threads per block exceeded" | ||||
| #endif | #endif | ||||
| #if GPU_MULTIPRESSOR_MAX_REGISTERS / (GPU_KERNEL_BLOCK_NUM_THREADS * GPU_KERNEL_MAX_REGISTERS) > \ | #if GPU_MULTIPRESSOR_MAX_REGISTERS / (GPU_KERNEL_BLOCK_NUM_THREADS * GPU_KERNEL_MAX_REGISTERS) > \ | ||||
| GPU_MULTIPROCESSOR_MAX_BLOCKS | GPU_MULTIPROCESSOR_MAX_BLOCKS | ||||
| # error "Maximum number of blocks per multiprocessor exceeded" | # error "Maximum number of blocks per multiprocessor exceeded" | ||||
| #endif | #endif | ||||
| #if GPU_KERNEL_MAX_REGISTERS > GPU_THREAD_MAX_REGISTERS | #if GPU_KERNEL_MAX_REGISTERS > GPU_THREAD_MAX_REGISTERS | ||||
| # error "Maximum number of registers per thread exceeded" | # error "Maximum number of registers per thread exceeded" | ||||
| #endif | #endif | ||||