17 #ifndef __TBB_flow_graph_opencl_node_H
18 #define __TBB_flow_graph_opencl_node_H
20 #define __TBB_flow_graph_opencl_node_H_include_area
24 #if __TBB_PREVIEW_OPENCL_NODE
37 #include <OpenCL/opencl.h>
45 namespace interface11 {
47 template <
typename DeviceFilter>
50 namespace opencl_info {
54 template <
typename Factory>
58 if (err != CL_SUCCESS) {
59 std::cerr << msg <<
"; error code: " << err << std::endl;
67 enforce_cl_retcode(clGetEventInfo(e, i,
sizeof(res), &res, NULL),
"Failed to get OpenCL event information");
74 enforce_cl_retcode(clGetDeviceInfo(
d, i,
sizeof(res), &res, NULL),
"Failed to get OpenCL device information");
79 inline std::string device_info<std::string>(cl_device_id
d, cl_device_info i) {
81 enforce_cl_retcode(clGetDeviceInfo(
d, i, 0, NULL, &required),
"Failed to get OpenCL device information");
83 char *buff = (
char*)alloca(required);
84 enforce_cl_retcode(clGetDeviceInfo(
d, i, required, buff, NULL),
"Failed to get OpenCL device information");
92 enforce_cl_retcode(clGetPlatformInfo(
p, i,
sizeof(res), &res, NULL),
"Failed to get OpenCL platform information");
97 inline std::string platform_info<std::string>(cl_platform_id
p, cl_platform_info i) {
99 enforce_cl_retcode(clGetPlatformInfo(
p, i, 0, NULL, &required),
"Failed to get OpenCL platform information");
101 char *buff = (
char*)alloca(required);
102 enforce_cl_retcode(clGetPlatformInfo(
p, i, required, buff, NULL),
"Failed to get OpenCL platform information");
123 return platform_info<std::string>(
platform_id(), CL_PLATFORM_PROFILE );
126 return platform_info<std::string>(
platform_id(), CL_PLATFORM_VERSION );
129 return platform_info<std::string>(
platform_id(), CL_PLATFORM_NAME );
132 return platform_info<std::string>(
platform_id(), CL_PLATFORM_VENDOR );
135 return platform_info<std::string>(
platform_id(), CL_PLATFORM_EXTENSIONS );
138 template <
typename T>
139 void info( cl_device_info i, T &t )
const {
148 std::sscanf(
version().c_str(),
"OpenCL %d", &major );
153 std::sscanf(
version().c_str(),
"OpenCL %d.%d", &major, &minor );
159 return (device_info<cl_command_queue_properties>(
my_cl_device_id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
162 return (device_info<cl_command_queue_properties>(
my_cl_device_id, CL_DEVICE_QUEUE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
167 return (device_info<cl_command_queue_properties>(
my_cl_device_id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
173 return device_info<std::array<size_t, 3>>(
my_cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES );
176 return device_info<size_t>(
my_cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE );
179 const std::string semi =
";";
181 return (semi +
built_in_kernels() + semi).find( semi + k + semi ) != std::string::npos;
184 return device_info<std::string>(
my_cl_device_id, CL_DEVICE_BUILT_IN_KERNELS );
193 return device_info<cl_bool>(
my_cl_device_id, CL_DEVICE_COMPILER_AVAILABLE );
196 return device_info<cl_bool>(
my_cl_device_id, CL_DEVICE_LINKER_AVAILABLE );
199 const std::string space =
" ";
201 return (space +
extensions() + space).find( space + ext + space ) != std::string::npos;
204 return device_info<std::string>(
my_cl_device_id, CL_DEVICE_EXTENSIONS );
216 return device_info<cl_uint>(
my_cl_device_id, CL_DEVICE_ADDRESS_BITS );
232 return device_info<cl_platform_id>(
my_cl_device_id, CL_DEVICE_PLATFORM );
243 template <
typename DeviceFilter>
245 template <
typename Factory>
247 template <
typename Factory>
251 template <
typename T,
typename Factory>
286 cl_uint num_platforms;
287 enforce_cl_retcode(clGetPlatformIDs(0, NULL, &num_platforms),
"clGetPlatformIDs failed");
289 std::vector<cl_platform_id> platforms(num_platforms);
290 enforce_cl_retcode(clGetPlatformIDs(num_platforms, platforms.data(), NULL),
"clGetPlatformIDs failed");
293 std::vector<cl_platform_id>::iterator platforms_it = platforms.begin();
294 cl_uint num_all_devices = 0;
295 while (platforms_it != platforms.end()) {
296 cl_int err = clGetDeviceIDs(*platforms_it, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
297 if (err == CL_DEVICE_NOT_FOUND) {
298 platforms_it = platforms.erase(platforms_it);
302 num_all_devices += num_devices;
307 std::vector<cl_device_id> devices(num_all_devices);
308 std::vector<cl_device_id>::iterator devices_it = devices.begin();
309 for (
auto p = platforms.begin();
p != platforms.end(); ++
p) {
310 enforce_cl_retcode(clGetDeviceIDs((*
p), CL_DEVICE_TYPE_ALL, (cl_uint)std::distance(devices_it, devices.end()), &*devices_it, &num_devices),
"clGetDeviceIDs failed");
311 devices_it += num_devices;
314 for (
auto d = devices.begin();
d != devices.end(); ++
d) {
318 return opencl_devices;
324 namespace opencl_info {
337 virtual void call() = 0;
341 template <
typename Callback,
typename T>
353 template <
typename T,
typename Factory = opencl_info::default_opencl_factory>
380 const T&
data(
bool wait =
true )
const {
401 dmsg.my_is_event =
false;
431 cl_command_queue cq = event_info<cl_command_queue>(
my_event, CL_EVENT_COMMAND_QUEUE );
432 if ( cq != event_info<cl_command_queue>( e, CL_EVENT_COMMAND_QUEUE ) )
443 enforce_cl_retcode( clFlush( event_info<cl_command_queue>(
my_event, CL_EVENT_COMMAND_QUEUE ) ),
"Failed to flush an OpenCL command queue" );
449 template <
typename Callback>
455 operator T&() {
return data(); }
456 operator const T&()
const {
return data(); }
480 __TBB_ASSERT( event_command_exec_status == CL_COMPLETE, NULL );
494 template <
typename K,
typename T,
typename Factory>
497 const T &t = dmsg.
data(
false );
499 return key_from_message<K, T>( t );
502 template <
typename Factory>
560 my_factory->enqueue_unmap_buffer(device, *
this,
d);
590 template <
typename Factory>
603 cl_buffer_region region = { index,
size };
604 this->
my_cl_mem = clCreateSubBuffer( m, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err );
613 this->
my_factory->enqueue_map_buffer( device, *
this, dmsg );
617 template <
typename,
typename>
628 template <
typename T,
typename Factory = opencl_info::default_opencl_factory>
631 template <
typename T,
typename Factory = opencl_info::default_opencl_factory>
640 template <access_type a>
642 T* ptr = (T*)
my_impl->get_host_ptr();
647 T*
data()
const {
return &access<read_write>()[0]; }
649 template <access_type a = read_write>
652 template <access_type a = read_write>
674 const cl_event *e =
d.get_event();
681 const cl_event *e =
d.get_event();
701 template <
typename,
typename>
705 template <
typename T,
typename Factory>
714 template <
typename T,
typename Factory>
720 #define is_typedef(type) \
721 template <typename T> \
723 template <typename C> \
724 static std::true_type check( typename C::type* ); \
725 template <typename C> \
726 static std::false_type check( ... ); \
728 static const bool value = decltype(check<T>(0))::value; \
734 template <
typename T>
736 return t.native_object();
739 template <
typename T>
745 template <
typename T,
typename Factory>
747 const T &t = dmsg.
data(
false );
748 typedef typename T::memory_object_type mem_obj_t;
749 mem_obj_t mem_obj = t.memory_object();
752 mem_obj.send( device,
d );
753 if (
d.get_event() ) dmsg.
set_event( *
d.get_event() );
756 template <
typename T>
758 typedef typename T::memory_object_type mem_obj_t;
759 mem_obj_t mem_obj = t.memory_object();
761 mem_obj.send( device, dmsg );
764 template <
typename T>
768 template <
typename T,
typename Factory>
770 const T &t = dmsg.
data(
false );
771 typedef typename T::memory_object_type mem_obj_t;
772 mem_obj_t mem_obj = t.memory_object();
775 mem_obj.receive(
d );
776 if (
d.get_event() ) dmsg.
set_event( *
d.get_event() );
779 template <
typename T>
787 template <
typename G = std::initializer_list<
int>,
typename L = std::initializer_list<
int>,
788 typename =
typename std::enable_if<!std::is_same<
typename std::decay<G>::type, opencl_range>::value>::type>
789 opencl_range(G&& global_work = std::initializer_list<int>({ 0 }), L&& local_work = std::initializer_list<int>({ 0, 0, 0 })) {
790 auto g_it = global_work.begin();
791 auto l_it = local_work.begin();
794 for (
int s = 0;
s < 3 && g_it != global_work.end(); ++g_it, ++l_it, ++
s) {
795 __TBB_ASSERT(l_it != local_work.end(),
"global_work & local_work must have same size");
809 template <
typename DeviceFilter>
821 std::vector<char> kernel_name;
822 for (
size_t curr_size = 32;; curr_size <<= 1 ) {
823 kernel_name.resize( curr_size <<= 1 );
824 enforce_cl_retcode( clGetKernelInfo( k.
my_cl_kernel, CL_KERNEL_FUNCTION_NAME, curr_size, kernel_name.data(), &ret_size ),
"Failed to get kernel info" );
825 if ( ret_size < curr_size )
break;
848 template <
typename DeviceFilter_>
851 template <
typename Factory>
865 enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ),
"Failed to release a command queue" );
882 template <
typename Factory>
888 e1 == NULL ? 0 : 1, e1, &e2, &err );
890 dmsg.
data(
false ) = ptr;
896 template <
typename Factory>
902 "Failed to unmap a buffer" );
908 template <
size_t NUM_ARGS,
typename T>
911 enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++,
sizeof(
p), &
p),
"Failed to set a kernel argument" );
914 template <
size_t NUM_ARGS,
typename T,
typename F>
916 __TBB_ASSERT((
static_cast<typename std::array<cl_event, NUM_ARGS>::size_type
>(num_events) < events.size()), NULL);
918 const cl_event *
const e = msg.
get_event();
920 events[num_events++] = *e;
926 template <
size_t NUM_ARGS,
typename T,
typename ...Rest>
927 void process_arg_list(
const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events,
int& num_events,
int& place,
const T& t,
const Rest&... args ) {
932 template <
size_t NUM_ARGS>
935 template <
typename T>
938 template <
typename T,
typename F>
943 template <
typename T,
typename ...Rest>
952 template <
typename ...Args>
954 std::array<cl_event,
sizeof...(Args)> events;
959 const cl_event e =
send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
968 template <
typename T,
typename ...Rest>
979 const range_type& work_size, cl_uint num_events, cl_event* event_list ) {
984 for (
s = 1;
s < 3 && g_size[
s] != size_t(-1); ++
s) {}
988 g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &
event ),
989 "Failed to enqueue a kernel" );
994 template <
typename T>
999 template <
typename T,
typename F>
1001 cl_event
const *e_ptr = msg.
get_event();
1003 if ( e_ptr != NULL ) {
1011 template <
typename T,
typename ...Rest>
1030 template<
typename Fn>
1039 __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1041 finalize_fn *
const fn_ptr = static_cast<finalize_fn*>(
data);
1042 __TBB_ASSERT(fn_ptr != NULL,
"Invalid finalize function pointer");
1049 template <
typename FinalizeFn,
typename ...Args>
1055 new finalize_fn_leaf<FinalizeFn>(
fn) ),
"Failed to set a callback" );
1093 enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM,
"All devices should be in the same platform");
1095 std::vector<cl_device_id> cl_device_ids;
1097 cl_device_ids.push_back((*d).my_cl_device_id);
1100 cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1102 cl_context ctx = clCreateContext(context_properties,
1103 (cl_uint)cl_device_ids.size(),
1104 cl_device_ids.data(),
1109 size_t device_counter = 0;
1111 (*d).my_device_id = device_counter++;
1113 cl_command_queue cq;
1115 if ((*d).major_version() >= 2) {
1116 if ((*d).out_of_order_exec_mode_on_host_present()) {
1117 cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1118 cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1120 cl_queue_properties props[] = { 0 };
1121 cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1126 cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1128 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1129 #pragma GCC diagnostic push
1130 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1132 #if _MSC_VER || __INTEL_COMPILER
1133 #pragma warning( push )
1134 #if __INTEL_COMPILER
1135 #pragma warning (disable: 1478)
1137 #pragma warning (disable: 4996)
1140 cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1141 #if _MSC_VER || __INTEL_COMPILER
1142 #pragma warning( pop )
1144 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1145 #pragma GCC diagnostic pop
1149 (*d).my_cl_command_queue = cq;
1159 template <
typename Factory>
1161 template <
typename Factory>
1163 template <
typename Factory>
1168 namespace opencl_info {
1172 template <
typename Factory>
1175 __TBB_ASSERT(!f.devices().empty(),
"No available devices");
1176 return *(f.devices().begin());
1183 cl_platform_id platform_id = devices.
begin()->platform_id();
1185 if (it->platform_id() == platform_id) {
1210 template <
typename T,
typename Factory>
1220 template <
typename Factory = opencl_info::default_opencl_factory>
1252 cl_kernel kernel = clCreateKernel(
my_cl_program, k.c_str(), &err );
1260 std::ifstream file_descriptor( filepath, std::ifstream::binary );
1261 if ( !file_descriptor.is_open() ) {
1262 std::string str = std::string(
"Could not open file: " ) + filepath;
1263 std::cerr << str << std::endl;
1266 file_descriptor.seekg( 0, file_descriptor.end );
1267 size_t length = size_t( file_descriptor.tellg() );
1268 file_descriptor.seekg( 0, file_descriptor.beg );
1272 file_descriptor.close();
1284 cl_uint num_devices, cl_device_id* device_list,
1287 cl_int err = clBuildProgram( program, num_devices, device_list, options,
1289 if( err == CL_SUCCESS )
1291 std::string str = std::string(
"Failed to build program: " ) +
name;
1292 if ( err == CL_BUILD_PROGRAM_FAILURE ) {
1294 for (
auto d = devices.
begin();
d != devices.
end(); ++
d ) {
1295 std::cerr <<
"Build log for device: " << (*d).name() << std::endl;
1297 cl_int query_err = clGetProgramBuildInfo(
1298 program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
1302 std::vector<char> output;
1303 output.resize( log_size );
1304 query_err = clGetProgramBuildInfo(
1305 program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG,
1306 output.size(), output.data(), NULL );
1308 std::cerr << output.data() << std::endl;
1310 std::cerr <<
"No build log available" << std::endl;
1320 template<
typename Filter>
1322 Filter
filter,
const char* message ) {
1323 for ( cl_uint i = 0; i < num_devices; ++i )
1324 if (
filter(device_list[i]) ) {
1325 device_list[i--] = device_list[--num_devices];
1332 void init(
const std::string& )
const {
1333 cl_uint num_devices;
1335 "Failed to get OpenCL context info" );
1338 cl_device_id *device_list = (cl_device_id *)alloca( num_devices*
sizeof( cl_device_id ) );
1339 enforce_cl_retcode( clGetContextInfo(
my_factory.context(), CL_CONTEXT_DEVICES, num_devices*
sizeof( cl_device_id ), device_list, NULL ),
1340 "Failed to get OpenCL context info" );
1341 const char *options = NULL;
1345 const char *
s[] = { fr.
content() };
1346 const size_t l[] = { fr.
length() };
1351 num_devices, device_list,
1353 return !
d.compiler_available() || !
d.linker_available();
1354 },
"No one device supports building program from sources" );
1357 options, NULL, NULL );
1361 options =
"-x spir";
1364 std::vector<const unsigned char*>
s(
1365 num_devices, reinterpret_cast<const unsigned char*>(fr.
content()) );
1366 std::vector<size_t> l( num_devices, fr.
length() );
1367 std::vector<cl_int> bin_statuses( num_devices, -1 );
1370 device_list, l.data(),
s.data(),
1371 bin_statuses.data(), &err );
1372 if( err != CL_SUCCESS ) {
1373 std::string statuses_str;
1374 for (
auto st = bin_statuses.begin(); st != bin_statuses.end(); ++st) {
1375 statuses_str += std::to_string((*st));
1379 std::string(
", binary_statuses = " ) + statuses_str );
1383 options, NULL, NULL );
1397 template <
typename DeviceFilter>
1400 template <
typename DeviceFilter>
1404 template<typename... Args>
1407 template<typename JP, typename Factory, typename... Ports>
1408 class
opencl_node< tuple<Ports...>, JP, Factory > :
public streaming_node< tuple<Ports...>, JP, Factory > {
1409 typedef streaming_node < tuple<Ports...>, JP, Factory >
base_type;
1414 :
base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::
default_factory() )
1420 :
base_type( g, kernel, opencl_info::default_device_selector <Factory >(), f )
1425 template <
typename DeviceSelector>
1433 template<
typename JP,
typename... Ports>
1440 :
base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::
default_factory() )
1443 template <
typename DeviceSelector>
1449 template<
typename... Ports>
1450 class opencl_node< tuple<Ports...> > :
public opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > {
1456 :
base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::
default_factory() )
1459 template <
typename DeviceSelector>
1467 using interface11::opencl_node;
1471 using interface11::opencl_buffer;
1472 using interface11::opencl_subbuffer;
1473 using interface11::opencl_device;
1474 using interface11::opencl_device_list;
1475 using interface11::opencl_program;
1477 using interface11::opencl_async_msg;
1478 using interface11::opencl_factory;
1479 using interface11::opencl_range;
1486 #undef __TBB_flow_graph_opencl_node_H_include_area
1488 #endif // __TBB_flow_graph_opencl_node_H