--- OpenCL/OpenCL.pm 2012/04/29 18:24:35 1.63 +++ OpenCL/OpenCL.pm 2012/05/05 13:55:59 1.80 @@ -30,8 +30,8 @@ which represents basically a function call with argument values. OpenCL::Memory objects of various flavours: OpenCL::Buffer objects (flat -memory areas, think arrays or structs) and OpenCL::Image objects (think 2d -or 3d array) for bulk data and input and output for kernels. +memory areas, think arrays or structs) and OpenCL::Image objects (think 2D +or 3D array) for bulk data and input and output for kernels. OpenCL::Sampler objects, which are kind of like texture filter modes in OpenGL. @@ -45,7 +45,8 @@ =head2 HELPFUL RESOURCES -The OpenCL specs used to develop this module: +The OpenCL specs used to develop this module - download these and keept +hema round, they are required reference material: http://www.khronos.org/registry/cl/specs/opencl-1.1.pdf http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf @@ -212,18 +213,21 @@ $ev->wait; =head2 Use the OpenGL module to share a texture between OpenCL and OpenGL and draw some julia -set tunnel effect. +set flight effect. -This is quite a long example to get you going. +This is quite a long example to get you going - you can download it from +L. use OpenGL ":all"; use OpenCL; + my $S = $ARGV[0] || 256; # window/texture size, smaller is faster + # open a window and create a gl texture - OpenGL::glpOpenWindow width => 256, height => 256; + OpenGL::glpOpenWindow width => $S, height => $S; my $texid = glGenTextures_p 1; glBindTexture GL_TEXTURE_2D, $texid; - glTexImage2D_c GL_TEXTURE_2D, 0, GL_RGBA8, 256, 256, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0; + glTexImage2D_c GL_TEXTURE_2D, 0, GL_RGBA8, $S, $S, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0; # find and use the first opencl device that let's us get a shared opengl context my $platform; @@ -252,19 +256,20 @@ kernel void juliatunnel (write_only image2d_t img, float time) { - float2 p = (float2)(get_global_id (0), get_global_id (1)) / 256.f * 2.f - 1.f; + int2 xy = (int2)(get_global_id (0), get_global_id (1)); + float2 p = convert_float2 (xy) / $S.f * 2.f - 1.f; - float2 m = (float2)(1.f, p.y) / fabs (p.x); - m.x = fabs (fmod (m.x + time * 0.05f, 4.f)) - 2.f; + float2 m = (float2)(1.f, p.y) / fabs (p.x); // tunnel + m.x = fabs (fmod (m.x + time * 0.05f, 4.f) - 2.f); float2 z = m; - float2 c = (float2)(sin (time * 0.05005), cos (time * 0.06001)); + float2 c = (float2)(sin (time * 0.01133f), cos (time * 0.02521f)); - for (int i = 0; i < 25 && dot (z, z) < 4.f; ++i) + for (int i = 0; i < 25 && dot (z, z) < 4.f; ++i) // standard julia z = (float2)(z.x * z.x - z.y * z.y, 2.f * z.x * z.y) + c; - float3 colour = (float3)(z.x, z.y, z.x * z.y); - write_imagef (img, (int2)(get_global_id (0), get_global_id (1)), (float4)(colour * p.x * p.x, 1.)); + float3 colour = (float3)(z.x, z.y, atan2 (z.y, z.x)); + write_imagef (img, xy, (float4)(colour * p.x * p.x, 1.)); } EOF @@ -278,9 +283,8 @@ $queue->acquire_gl_objects ([$tex]); # configure and run our kernel - $kernel->set_image2d (0, $tex); - $kernel->set_float (1, $time); - $queue->nd_range_kernel ($kernel, undef, [256, 256], undef); + $kernel->setf ("mf", $tex, $time*2); # mf = memory object, float + $queue->nd_range_kernel ($kernel, undef, [$S, $S], undef); # release objects to opengl again $queue->release_gl_objects ([$tex]); @@ -304,6 +308,50 @@ select undef, undef, undef, 1/60; } +=head2 How to modify the previous example to not rely on GL sharing. + +For those poor souls with only a sucky CPU OpenCL implementation, you +currently have to read the image into some perl scalar, and then modify a +texture or use glDrawPixels or so). + +First, when you don't need gl sharing, you can create the context much simpler: + + $ctx = $platform->context (undef, [$dev]) + +To use a texture, you would modify the above example by creating an +OpenCL::Image manually instead of deriving it from a texture: + + my $tex = $ctx->image2d (OpenCL::MEM_WRITE_ONLY, OpenCL::RGBA, OpenCL::UNORM_INT8, $S, $S); + +And in the draw loop, intead of acquire_gl_objects/release_gl_objects, you +would read the image2d after the kernel has written it: + + $queue->read_image ($tex, 0, 0, 0, 0, $S, $S, 1, 0, 0, my $data); + +And then you would upload the pixel data to the texture (or use glDrawPixels): + + glTexSubImage2D_s GL_TEXTURE_2D, 0, 0, 0, $S, $S, GL_RGBA, GL_UNSIGNED_BYTE, $data; + +The fully modified example can be found at +L. + +=head2 Julia sets look soooo 80ies. + +Then colour them differently, e.g. using orbit traps! Replace the loop and +colour calculation from the previous examples by this: + + float2 dm = (float2)(1.f, 1.f); + + for (int i = 0; i < 25; ++i) + { + z = (float2)(z.x * z.x - z.y * z.y, 2.f * z.x * z.y) + c; + dm = fmin (dm, (float2)(fabs (dot (z, z) - 1.f), fabs (z.x - 1.f))); + } + + float3 colour = (float3)(dm.x * dm.y, dm.x * dm.y, dm.x); + +Also try C<-10.f> instead of C<-1.f>. + =head1 DOCUMENTATION =head2 BASIC CONVENTIONS @@ -342,12 +390,50 @@ context, no event is created. In all other contexts an event is returned by the method. -=item * This module expects all functions to return C. If any +=item * This module expects all functions to return C. If any other status is returned the function will throw an exception, so you don't normally have to to any error checking. =back +=head2 CONSTANTS + +All C constants that this module supports are always available +in the C namespace as C (i.e. without the C +prefix). Constants which are not defined in the heaer files used during +compilation, or otherwise are not available, will have the value C<-1>. + +=head2 OPENCL 1.1 VS. OPENCL 1.2 + +This module supports both OpenCL version 1.1 and 1.2, although the OpenCL +1.2 interface hasn't been tested much for lack of availability of an +actual implementation. + +Every function or method in this manual page that interfaces to a +particular OpenCL function has a link to the its C manual page. + +If the link contains a F<1.1>, then this function is an OpenCL 1.1 +function. Most but not all also exist in OpenCL 1.2, and this module +tries to emulate the missing ones for you, when told to do so at +compiletime. You cna check whether a function was removed in OpenCL 1.2 by +replacing the F<1.1> component in the URL by F<1.2>. + +If the link contains a F<1.2>, then this is a OpenCL 1.2-only +function. Even if the module was compiled with OpenCL 1.2 header files +and has an 1.2 OpenCL library, calling such a function on a platform that +doesn't implement 1.2 causes undefined behaviour, usually a crash (But +this is not guaranteed). + +You can find out whether this module was compiled to prefer 1.1 +functionality by ooking at C - if it is true, then +1.1 functions generally are implemented using 1.1 OpenCL functions. If it +is false, then 1.1 functions missing from 1.2 are emulated by calling 1.2 +fucntions. + +This is a somewhat sorry state of affairs, but the Khronos group choose to +make every release of OpenCL source and binary incompatible with previous +releases. + =head2 PERL AND OPENCL TYPES This handy(?) table lists OpenCL types and their perl, PDL and pack/unpack @@ -388,13 +474,20 @@ perhaps most important, to signal completion of queued jobs (by setting callbacks on OpenCL::Event objects). +The OpenCL module converts all these callbacks into events - you can +still register callbacks, but they are not executed when your OpenCL +implementation calls the actual callback, but only later. Therefore, none +of the limitations of OpenCL callbacks apply to the perl implementation: +it is perfectly safe to make blocking operations from event callbacks, and +enqueued operations don't need to be flushed. + To facilitate this, this module maintains an event queue - each time an asynchronous event happens, it is queued, and perl will be interrupted. This is implemented via the L module. In addition, this module has L support, so it can seamlessly integrate itself into many event loops. -Since this module is a bit hard to understand, here are some case examples: +Since L is a bit hard to understand, here are some case examples: =head3 Don't use callbacks. @@ -501,7 +594,7 @@ our $POLL_FUNC; # set by XS BEGIN { - our $VERSION = '0.98'; + our $VERSION = '0.99'; require XSLoader; XSLoader::load (__PACKAGE__, $VERSION); @@ -516,6 +609,8 @@ @OpenCL::Kernel::ISA = @OpenCL::Event::ISA = OpenCL::Object::; + @OpenCL::SubDevice::ISA = OpenCL::Device::; + @OpenCL::Buffer::ISA = @OpenCL::Image::ISA = OpenCL::Memory::; @@ -529,6 +624,9 @@ @OpenCL::Image1DBuffer::ISA = OpenCL::Image::; @OpenCL::UserEvent::ISA = OpenCL::Event::; + + @OpenCL::MappedBuffer::ISA = + @OpenCL::MappedImage::ISA = OpenCL::Mapped::; } =head2 THE OpenCL PACKAGE @@ -562,13 +660,17 @@ =item $ctx = OpenCL::context_from_type $properties, $type = OpenCL::DEVICE_TYPE_DEFAULT, $callback->($err, $pvt) = $print_stderr Tries to create a context from a default device and platform type - never worked for me. +Consider using C<< $platform->context_from_type >> instead. + +type: OpenCL::DEVICE_TYPE_DEFAULT, OpenCL::DEVICE_TYPE_CPU, OpenCL::DEVICE_TYPE_GPU, +OpenCL::DEVICE_TYPE_ACCELERATOR, OpenCL::DEVICE_TYPE_CUSTOM, OpenCL::DEVICE_TYPE_ALL. L =item $ctx = OpenCL::context $properties, \@devices, $callback->($err, $pvt) = $print_stderr) -Create a new OpenCL::Context object using the given device object(s). This -function isn't implemented yet, use C<< $platform->context >> instead. +Create a new OpenCL::Context object using the given device object(s). +Consider using C<< $platform->context >> instead. L @@ -659,7 +761,7 @@ =item $ctx = $platform->context ($properties, \@devices, $callback->($err, $pvt) = $print_stderr) Create a new OpenCL::Context object using the given device object(s)- a -CL_CONTEXT_PLATFORM property is supplied automatically. +OpenCL::CONTEXT_PLATFORM property is supplied automatically. L @@ -685,23 +787,23 @@ =item $string = $platform->profile -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $platform->version -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $platform->name -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $platform->vendor -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $platform->extensions -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end platform @@ -715,273 +817,305 @@ See C<< $platform->info >> for details. +type: OpenCL::DEVICE_TYPE_DEFAULT, OpenCL::DEVICE_TYPE_CPU, +OpenCL::DEVICE_TYPE_GPU, OpenCL::DEVICE_TYPE_ACCELERATOR, +OpenCL::DEVICE_TYPE_CUSTOM, OpenCL::DEVICE_TYPE_ALL. + +fp_config: OpenCL::FP_DENORM, OpenCL::FP_INF_NAN, OpenCL::FP_ROUND_TO_NEAREST, +OpenCL::FP_ROUND_TO_ZERO, OpenCL::FP_ROUND_TO_INF, OpenCL::FP_FMA, +OpenCL::FP_SOFT_FLOAT, OpenCL::FP_CORRECTLY_ROUNDED_DIVIDE_SQRT. + +mem_cache_type: OpenCL::NONE, OpenCL::READ_ONLY_CACHE, OpenCL::READ_WRITE_CACHE. + +local_mem_type: OpenCL::LOCAL, OpenCL::GLOBAL. + +exec_capabilities: OpenCL::EXEC_KERNEL, OpenCL::EXEC_NATIVE_KERNEL. + +command_queue_properties: OpenCL::QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, +OpenCL::QUEUE_PROFILING_ENABLE. + +partition_properties: OpenCL::DEVICE_PARTITION_EQUALLY, +OpenCL::DEVICE_PARTITION_BY_COUNTS, OpenCL::DEVICE_PARTITION_BY_COUNTS_LIST_END, +OpenCL::DEVICE_PARTITION_BY_AFFINITY_DOMAIN. + +affinity_domain: OpenCL::DEVICE_AFFINITY_DOMAIN_NUMA, +OpenCL::DEVICE_AFFINITY_DOMAIN_L4_CACHE, OpenCL::DEVICE_AFFINITY_DOMAIN_L3_CACHE, +OpenCL::DEVICE_AFFINITY_DOMAIN_L2_CACHE, OpenCL::DEVICE_AFFINITY_DOMAIN_L1_CACHE, +OpenCL::DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE. + L +=item @devices = $device->sub_devices (\@properties) + +Creates OpencL::SubDevice objects by partitioning an existing device. + +L + =for gengetinfo begin device =item $device_type = $device->type -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->vendor_id -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_compute_units -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_work_item_dimensions -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->max_work_group_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item @ints = $device->max_work_item_sizes -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_char -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_short -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_int -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_long -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_float -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_double -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_clock_frequency -Calls C with C and returns the result. +Calls C with C and returns the result. =item $bitfield = $device->address_bits -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_read_image_args -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_write_image_args -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $device->max_mem_alloc_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->image2d_max_width -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->image2d_max_height -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->image3d_max_width -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->image3d_max_height -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->image3d_max_depth -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->image_support -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->max_parameter_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_samplers -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->mem_base_addr_align -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->min_data_type_align_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device_fp_config = $device->single_fp_config -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device_mem_cache_type = $device->global_mem_cache_type -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->global_mem_cacheline_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $device->global_mem_cache_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $device->global_mem_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $device->max_constant_buffer_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_constant_args -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device_local_mem_type = $device->local_mem_type -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $device->local_mem_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $boolean = $device->error_correction_support -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->profiling_timer_resolution -Calls C with C and returns the result. +Calls C with C and returns the result. =item $boolean = $device->endian_little -Calls C with C and returns the result. +Calls C with C and returns the result. =item $boolean = $device->available -Calls C with C and returns the result. +Calls C with C and returns the result. =item $boolean = $device->compiler_available -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device_exec_capabilities = $device->execution_capabilities -Calls C with C and returns the result. +Calls C with C and returns the result. =item $command_queue_properties = $device->properties -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ = $device->platform -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $device->name -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $device->vendor -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $device->driver_version -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $device->profile -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $device->version -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $device->extensions -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_half -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_char -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_short -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_int -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_long -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_float -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_double -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_half -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device_fp_config = $device->double_fp_config -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device_fp_config = $device->half_fp_config -Calls C with C and returns the result. +Calls C with C and returns the result. =item $boolean = $device->host_unified_memory -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device = $device->parent_device_ext -Calls C with C and returns the result. +Calls C with C and returns the result. =item @device_partition_property_exts = $device->partition_types_ext -Calls C with C and returns the result. +Calls C with C and returns the result. =item @device_partition_property_exts = $device->affinity_domains_ext -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->reference_count_ext -Calls C with C and returns the result. +Calls C with C and returns the result. =item @device_partition_property_exts = $device->partition_style_ext -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end device @@ -989,6 +1123,32 @@ =head2 THE OpenCL::Context CLASS +An OpenCL::Context is basically a container, or manager, for a number of +devices of a platform. It is used to create all sorts of secondary objects +such as buffers, queues, programs and so on. + +All context creation functions and methods take a list of properties +(type-value pairs). All property values can be specified as integers - +some additionally support other types: + +=over 4 + +=item OpenCL::CONTEXT_PLATFORM + +Also accepts OpenCL::Platform objects. + +=item OpenCL::GLX_DISPLAY_KHR + +Also accepts C, in which case a deep and troubling hack is engaged +to find the current glx display (see L). + +=item OpenCL::GL_CONTEXT_KHR + +Also accepts C, in which case a deep and troubling hack is engaged +to find the current glx context (see L). + +=back + =over 4 =item $prog = $ctx->build_program ($program, $options = "") @@ -1045,6 +1205,10 @@ Creates a new OpenCL::Buffer (actually OpenCL::BufferObj) object with the given flags and octet-size. +flags: OpenCL::MEM_READ_WRITE, OpenCL::MEM_WRITE_ONLY, OpenCL::MEM_READ_ONLY, +OpenCL::MEM_USE_HOST_PTR, OpenCL::MEM_ALLOC_HOST_PTR, OpenCL::MEM_COPY_HOST_PTR, +OpenCL::MEM_HOST_WRITE_ONLY, OpenCL::MEM_HOST_READ_ONLY, OpenCL::MEM_HOST_NO_ACCESS. + L =item $buf = $ctx->buffer_sv ($flags, $data) @@ -1057,6 +1221,21 @@ Creates a new OpenCL::Image object and optionally initialises it with the given data values. +channel_order: OpenCL::R, OpenCL::A, OpenCL::RG, OpenCL::RA, OpenCL::RGB, +OpenCL::RGBA, OpenCL::BGRA, OpenCL::ARGB, OpenCL::INTENSITY, OpenCL::LUMINANCE, +OpenCL::Rx, OpenCL::RGx, OpenCL::RGBx. + +channel_type: OpenCL::SNORM_INT8, OpenCL::SNORM_INT16, OpenCL::UNORM_INT8, +OpenCL::UNORM_INT16, OpenCL::UNORM_SHORT_565, OpenCL::UNORM_SHORT_555, +OpenCL::UNORM_INT_101010, OpenCL::SIGNED_INT8, OpenCL::SIGNED_INT16, +OpenCL::SIGNED_INT32, OpenCL::UNSIGNED_INT8, OpenCL::UNSIGNED_INT16, +OpenCL::UNSIGNED_INT32, OpenCL::HALF_FLOAT, OpenCL::FLOAT. + +type: OpenCL::MEM_OBJECT_BUFFER, OpenCL::MEM_OBJECT_IMAGE2D, +OpenCL::MEM_OBJECT_IMAGE3D, OpenCL::MEM_OBJECT_IMAGE2D_ARRAY, +OpenCL::MEM_OBJECT_IMAGE1D, OpenCL::MEM_OBJECT_IMAGE1D_ARRAY, +OpenCL::MEM_OBJECT_IMAGE1D_BUFFER. + L =item $img = $ctx->image2d ($flags, $channel_order, $channel_type, $width, $height, $row_pitch = 0, $data = undef) @@ -1078,6 +1257,8 @@ Creates a new OpenCL::Buffer (actually OpenCL::BufferObj) object that refers to the given OpenGL buffer object. +flags: OpenCL::MEM_READ_WRITE, OpenCL::MEM_READ_ONLY, OpenCL::MEM_WRITE_ONLY. + http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clCreateFromGLBuffer.html =item $img = $ctx->gl_texture ($flags, $target, $miplevel, $texture) @@ -1085,6 +1266,13 @@ Creates a new OpenCL::Image object that refers to the given OpenGL texture object or buffer. +target: GL_TEXTURE_1D, GL_TEXTURE_1D_ARRAY, GL_TEXTURE_BUFFER, +GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D, +GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_POSITIVE_Y, +GL_TEXTURE_CUBE_MAP_POSITIVE_Z, GL_TEXTURE_CUBE_MAP_NEGATIVE_X, +GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, +GL_TEXTURE_RECTANGLE/GL_TEXTURE_RECTANGLE_ARB. + http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateFromGLTexture.html =item $img = $ctx->gl_texture2d ($flags, $target, $miplevel, $texture) @@ -1119,6 +1307,11 @@ Creates a new OpenCL::Sampler object. +addressing_mode: OpenCL::ADDRESS_NONE, OpenCL::ADDRESS_CLAMP_TO_EDGE, +OpenCL::ADDRESS_CLAMP, OpenCL::ADDRESS_REPEAT, OpenCL::ADDRESS_MIRRORED_REPEAT. + +filter_mode: OpenCL::FILTER_NEAREST, OpenCL::FILTER_LINEAR. + L =item $program = $ctx->program_with_source ($string) @@ -1127,6 +1320,30 @@ L +=item ($program, \@status) = $ctx->program_with_binary (\@devices, \@binaries) + +Creates a new OpenCL::Program object from the given binaries. + +L + +Example: clone an existing program object that contains a successfully +compiled program, no matter how useless this is. + + my $clone = $ctx->program_with_binary ([$prog->devices], [$prog->binaries]); + +=item $program = $ctx->program_with_built_in_kernels (\@devices, $kernel_names) + +Creates a new OpenCL::Program object from the given built-in kernel names. + +L + +=item $program = $ctx->link_program (\@devices, $options, \@programs, $cb->($program) = undef) + +Links all (already compiled) program objects specified in C<@programs> +together and returns a new OpenCL::Program object with the result. + +L + =item $packed_value = $ctx->info ($name) See C<< $platform->info >> for details. @@ -1137,19 +1354,19 @@ =item $uint = $context->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item @devices = $context->devices -Calls C with C and returns the result. +Calls C with C and returns the result. =item @property_ints = $context->properties -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $context->num_devices -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end context @@ -1206,16 +1423,21 @@ http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueWriteBufferRect.html -=item $ev = $queue->read_image ($src, $blocking, $x, $y, $z, $width, $height, $depth, $row_pitch, $slice_pitch, $data, $wait_events...) +=item $ev = $queue->copy_buffer_to_image ($src_buffer, $dst_image, $src_offset, $dst_x, $dst_y, $dst_z, $width, $height, $depth, $wait_events...) -L +L -=item $ev = $queue->copy_buffer_to_image ($src_buffer, $dst_image, $src_offset, $dst_x, $dst_y, $dst_z, $width, $height, $depth, $wait_events...) +=item $ev = $queue->read_image ($src, $blocking, $x, $y, $z, $width, $height, $depth, $row_pitch, $slice_pitch, $data, $wait_events...) + +C<$row_pitch> (and C<$slice_pitch>) can be C<0>, in which case the OpenCL +module uses the image width (and height) to supply default values. L =item $ev = $queue->write_image ($src, $blocking, $x, $y, $z, $width, $height, $depth, $row_pitch, $slice_pitch, $data, $wait_events...) +C<$row_pitch> (and C<$slice_pitch>) can be C<0>, in which case the OpenCL +module uses the image width (and height) to supply default values. L =item $ev = $queue->copy_image ($src_image, $dst_image, $src_x, $src_y, $src_z, $dst_x, $dst_y, $dst_z, $width, $height, $depth, $wait_events...) @@ -1270,6 +1492,14 @@ L +=item $ev = $queue->migrate_mem_objects (\@mem_objects, $flags, $wait_events...) + +Migrates a number of OpenCL::Memory objects to or from the device. + +flags: OpenCL::MIGRATE_MEM_OBJECT_HOST, OpenCL::MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED + +L + =item $ev = $queue->acquire_gl_objects ([object, ...], $wait_events...) Enqueues a list (an array-ref of OpenCL::Memory objects) to be acquired @@ -1314,24 +1544,93 @@ =item $ctx = $command_queue->context -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device = $command_queue->device -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $command_queue->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $command_queue_properties = $command_queue->properties -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end command_queue =back +=head3 MEMORY MAPPED BUFFERS + +OpenCL allows you to map buffers and images to host memory (read: perl +scalars). This is done much like reading or copying a buffer, by enqueuing +a map or unmap operation on the command queue. + +The map operations return an C object - see L section for details on what to do with these +objects. + +The object will be unmapped automatically when the mapped object is +destroyed (you can use a barrier to make sure the unmap has finished, +before using the buffer in a kernel), but you can also enqueue an unmap +operation manually. + +=over 4 + +=item $mapped_buffer = $queue->map_buffer ($buf, $blocking=1, $map_flags=OpenCL::MAP_READ|OpenCL::MAP_WRITE, $offset=0, $size=undef, $wait_events...) + +Maps the given buffer into host memory and returns an +C object. If C<$size> is specified as undef, then +the map will extend to the end of the buffer. + +map_flags: OpenCL::MAP_READ, OpenCL::MAP_WRITE, OpenCL::MAP_WRITE_INVALIDATE_REGION. + +L + +Example: map the buffer $buf fully and replace the first 4 bytes by "abcd", then unmap. + + { + my $mapped = $queue->map_buffer ($buf, 1, OpenCL::MAP_WRITE); + substr $$mapped, 0, 4, "abcd"; + } # asynchronously unmap because $mapped is destroyed + +=item $mapped_image = $queue->map_image ($img, $blocking=1, $map_flags=OpenCL::MAP_READ|OpenCL::MAP_WRITE, $x=0, $y=0, $z=0, $width=undef, $height=undef, $depth=undef, $wait_events...) + +Maps the given image area into host memory and return an +C object. + +If any of C<$width>, C<$height> and/or C<$depth> are C then they +will be replaced by the maximum possible value. + +L + +Example: map an image (with OpenCL::UNSIGNED_INT8 channel type) and set +the first channel of the leftmost column to 5, then explicitly unmap +it. You are not necessarily meant to do it this way, this example just +shows you the accessors to use :) + + my $mapped = $queue->map_image ($image, 1, OpenCL::MAP_WRITE); + + $mapped->set ($_ * $mapped->row_pitch, pack "C", 5) + for 0..$image->height; + + $mapped->unmap;. + $mapped->wait; # only needed for out of order queues normally + +=item $ev = $queue->unmap ($mapped, $wait_events...) + +Unmaps the data from host memory. You must not call any methods that +modify the data, or modify the data scalar directly, after calling this +method. + +The mapped event object will always be passed as part of the +$wait_events. The mapped event object will be replaced by the new event +object that this request creates. + +=back + =head2 THE OpenCL::Memory CLASS This the superclass of all memory objects - OpenCL::Buffer, OpenCL::Image, @@ -1349,39 +1648,39 @@ =item $mem_object_type = $mem->type -Calls C with C and returns the result. +Calls C with C and returns the result. =item $mem_flags = $mem->flags -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $mem->size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ptr_value = $mem->host_ptr -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $mem->map_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $mem->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ctx = $mem->context -Calls C with C and returns the result. +Calls C with C and returns the result. =item $mem = $mem->associated_memobject -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $mem->offset -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end mem @@ -1411,7 +1710,7 @@ =item $subbuf = $buf_obj->sub_buffer_region ($flags, $origin, $size) Creates an OpenCL::Buffer objects from this buffer and returns it. The -C is assumed to be C. +C is assumed to be C. L @@ -1437,33 +1736,33 @@ =item ($channel_order, $channel_data_type) = $image->format Returns the channel order and type used to create the image by calling -C with C. +C with C. =for gengetinfo begin image =item $int = $image->element_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $image->row_pitch -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $image->slice_pitch -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $image->width -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $image->height -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $image->depth -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end image @@ -1471,11 +1770,11 @@ =item $GLenum = $gl_texture->target -Calls C with C and returns the result. +Calls C with C and returns the result. =item $GLint = $gl_texture->gl_mipmap_level -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end gl_texture @@ -1495,23 +1794,23 @@ =item $uint = $sampler->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ctx = $sampler->context -Calls C with C and returns the result. +Calls C with C and returns the result. =item $addressing_mode = $sampler->normalized_coords -Calls C with C and returns the result. +Calls C with C and returns the result. =item $filter_mode = $sampler->addressing_mode -Calls C with C and returns the result. +Calls C with C and returns the result. =item $boolean = $sampler->filter_mode -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end sampler @@ -1539,6 +1838,17 @@ Some implementations fail with C when the compilation state is successful but some later stage fails. +options: C<-D name>, C<-D name=definition>, C<-I dir>, +C<-cl-single-precision-constant>, C<-cl-denorms-are-zero>, +C<-cl-fp32-correctly-rounded-divide-sqrt>, C<-cl-opt-disable>, +C<-cl-mad-enable>, C<-cl-no-signed-zeros>, C<-cl-unsafe-math-optimizations>, +C<-cl-finite-math-only>, C<-cl-fast-relaxed-math>, +C<-w>, C<-Werror>, C<-cl-std=CL1.1/CL1.2>, C<-cl-kernel-arg-info>, +C<-create-library>, C<-enable-link-options>. + +build_status: OpenCL::BUILD_SUCCESS, OpenCL::BUILD_NONE, +OpenCL::BUILD_ERROR, OpenCL::BUILD_IN_PROGRESS. + L =item $program->build_async (\@devices = undef, $options = "", $cb->($program) = undef) @@ -1546,11 +1856,24 @@ Similar to C<< ->build >>, except it starts a thread, and never fails (you need to check the compilation status form the callback, or by polling). +=item $program->compile (\@devices = undef, $options = "", \%headers = undef, $cb->($program) = undef) + +Compiles the given program for the given devices (or all devices if +undef). If C<$headers> is given, it must be a hashref with include name => +OpenCL::Program pairs. + +L + =item $packed_value = $program->build_info ($device, $name) Similar to C<< $platform->info >>, but returns build info for a previous build attempt for the given device. +binary_type: OpenCL::PROGRAM_BINARY_TYPE_NONE, +OpenCL::PROGRAM_BINARY_TYPE_COMPILED_OBJECT, +OpenCL::PROGRAM_BINARY_TYPE_LIBRARY, +OpenCL::PROGRAM_BINARY_TYPE_EXECUTABLE. + L =item $kernel = $program->kernel ($function_name) @@ -1570,15 +1893,19 @@ =item $build_status = $program->build_status ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $program->build_options ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $program->build_log ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. + +=item $binary_type = $program->binary_type ($device) + +Calls C with C and returns the result. =for gengetinfo end program_build @@ -1592,27 +1919,27 @@ =item $uint = $program->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ctx = $program->context -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $program->num_devices -Calls C with C and returns the result. +Calls C with C and returns the result. =item @devices = $program->devices -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $program->source -Calls C with C and returns the result. +Calls C with C and returns the result. =item @ints = $program->binary_sizes -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end program @@ -1643,23 +1970,23 @@ =item $string = $kernel->function_name -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $kernel->num_args -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $kernel->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ctx = $kernel->context -Calls C with C and returns the result. +Calls C with C and returns the result. =item $program = $kernel->program -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end kernel @@ -1667,35 +1994,62 @@ See C<< $platform->info >> for details. -The reason this method is not called C is that there already is an -C<< ->info >> method. - L =for gengetinfo begin kernel_work_group =item $int = $kernel->work_group_size ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =item @ints = $kernel->compile_work_group_size ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $kernel->local_mem_size ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $kernel->preferred_work_group_size_multiple ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $kernel->private_mem_size ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end kernel_work_group +=item $packed_value = $kernel->arg_info ($idx, $name) + +See C<< $platform->info >> for details. + +L + +=for gengetinfo begin kernel_arg + +=item $kernel_arg_address_qualifier = $kernel->arg_address_qualifier ($idx) + +Calls C with C and returns the result. + +=item $kernel_arg_access_qualifier = $kernel->arg_access_qualifier ($idx) + +Calls C with C and returns the result. + +=item $string = $kernel->arg_type_name ($idx) + +Calls C with C and returns the result. + +=item $kernel_arg_type_qualifier = $kernel->arg_type_qualifier ($idx) + +Calls C with C and returns the result. + +=item $string = $kernel->arg_name ($idx) + +Calls C with C and returns the result. + +=for gengetinfo end kernel_arg + =item $kernel->setf ($format, ...) Sets the arguments of a kernel. Since OpenCL 1.1 doesn't have a generic @@ -1816,23 +2170,23 @@ =item $queue = $event->command_queue -Calls C with C and returns the result. +Calls C with C and returns the result. =item $command_type = $event->command_type -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $event->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $event->command_execution_status -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ctx = $event->context -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end event @@ -1849,19 +2203,19 @@ =item $ulong = $event->profiling_command_queued -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $event->profiling_command_submit -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $event->profiling_command_start -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $event->profiling_command_end -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end profiling @@ -1878,10 +2232,113 @@ Sets the execution status of the user event. Can only be called once, either with OpenCL::COMPLETE or a negative number as status. +execution_status: OpenCL::COMPLETE or a negative integer. + L =back +=head2 THE OpenCL::Mapped CLASS + +This class represents objects mapped into host memory. They are +represented by a blessed string scalar. The string data is the mapped +memory area, that is, if you read or write it, then the mapped object is +accessed directly. + +You must only ever use operations that modify the string in-place - for +example, a C that doesn't change the length, or maybe a regex that +doesn't change the length. Any other operation might cause the data to be +copied. + +When the object is destroyed it will enqueue an implicit unmap operation +on the queue that was used to create it. + +Keep in mind that you I to unmap (or destroy) mapped objects before +OpenCL sees the changes, even if some implementations don't need this +sometimes. + +Example, replace the first two floats in the mapped buffer by 1 and 2. + + my $mapped = $queue->map_buffer ($buf, ... + $mapped->event->wait; # make sure it's there + + # now replace first 8 bytes by new data, which is exactly 8 bytes long + # we blindly assume device endianness to equal host endianness + # (and of course, we assume iee 754 single precision floats :) + substr $$mapped, 0, 8, pack "f*", 1, 2; + +=over 4 + +=item $ev = $mapped->unmap ($wait_events...) + +Unmaps the mapped memory object, using the queue originally used to create +it, quite similarly to C<< $queue->unmap ($mapped, ...) >>. + +=item $bool = $mapped->mapped + +Returns whether the object is still mapped - true before an C is +enqueued, false afterwards. + +=item $ev = $mapped->event + +Return the event object associated with the mapped object. Initially, this +will be the event object created when mapping the object, and after an +unmap, this will be the event object that the unmap operation created. + +=item $mapped->wait + +Same as C<< $mapped->event->wait >> - makes sure no operations on this +mapped object are outstanding. + +=item $bytes = $mapped->size + +Returns the size of the mapped area, in bytes. Same as C. + +=item $ptr = $mapped->ptr + +Returns the raw memory address of the mapped area. + +=item $mapped->set ($offset, $data) + +Replaces the data at the given C<$offset> in the memory area by the new +C<$data>. This method is safer than direct manipulation of C<$mapped> +because it does bounds-checking, but also slower. + +=item $data = $mapped->get ($offset, $length) + +Returns (without copying) a scalar representing the data at the given +C<$offset> and C<$length> in the mapped memory area. This is the same as +the following substr, except much slower; + + $data = substr $$mapped, $offset, $length + +=cut + +sub OpenCL::Mapped::get { + substr ${$_[0]}, $_[1], $_[2] +} + +=back + +=head2 THE OpenCL::MappedBuffer CLASS + +This is a subclass of OpenCL::Mapped, representing mapped buffers. + +=head2 THE OpenCL::MappedImage CLASS + +This is a subclass of OpenCL::Mapped, representing mapped images. + +=over 4 + +=item $bytes = $mapped->row_pitch + +=item $bytes = $mapped->slice_pitch + +Return the row or slice pitch of the image that has been mapped. + +=back + + =cut 1;