--- OpenCL/OpenCL.pm 2012/04/28 13:33:04 1.62 +++ OpenCL/OpenCL.pm 2012/05/07 01:00:31 1.86 @@ -30,8 +30,8 @@ which represents basically a function call with argument values. OpenCL::Memory objects of various flavours: OpenCL::Buffer objects (flat -memory areas, think arrays or structs) and OpenCL::Image objects (think 2d -or 3d array) for bulk data and input and output for kernels. +memory areas, think arrays or structs) and OpenCL::Image objects (think 2D +or 3D array) for bulk data and input and output for kernels. OpenCL::Sampler objects, which are kind of like texture filter modes in OpenGL. @@ -45,7 +45,8 @@ =head2 HELPFUL RESOURCES -The OpenCL specs used to develop this module: +The OpenCL specs used to develop this module - download these and keept +hema round, they are required reference material: http://www.khronos.org/registry/cl/specs/opencl-1.1.pdf http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf @@ -212,18 +213,21 @@ $ev->wait; =head2 Use the OpenGL module to share a texture between OpenCL and OpenGL and draw some julia -set tunnel effect. +set flight effect. -This is quite a long example to get you going. +This is quite a long example to get you going - you can also download it +from L. use OpenGL ":all"; use OpenCL; + my $S = $ARGV[0] || 256; # window/texture size, smaller is faster + # open a window and create a gl texture - OpenGL::glpOpenWindow width => 256, height => 256; + OpenGL::glpOpenWindow width => $S, height => $S; my $texid = glGenTextures_p 1; glBindTexture GL_TEXTURE_2D, $texid; - glTexImage2D_c GL_TEXTURE_2D, 0, GL_RGBA8, 256, 256, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0; + glTexImage2D_c GL_TEXTURE_2D, 0, GL_RGBA8, $S, $S, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0; # find and use the first opencl device that let's us get a shared opengl context my $platform; @@ -252,19 +256,20 @@ kernel void juliatunnel (write_only image2d_t img, float time) { - float2 p = (float2)(get_global_id (0), get_global_id (1)) / 256.f * 2.f - 1.f; + int2 xy = (int2)(get_global_id (0), get_global_id (1)); + float2 p = convert_float2 (xy) / $S.f * 2.f - 1.f; - float2 m = (float2)(1.f, p.y) / fabs (p.x); - m.x = fabs (fmod (m.x + time * 0.05f, 4.f)) - 2.f; + float2 m = (float2)(1.f, p.y) / fabs (p.x); // tunnel + m.x = fabs (fmod (m.x + time * 0.05f, 4.f) - 2.f); float2 z = m; - float2 c = (float2)(sin (time * 0.05005), cos (time * 0.06001)); + float2 c = (float2)(sin (time * 0.01133f), cos (time * 0.02521f)); - for (int i = 0; i < 25 && dot (z, z) < 4.f; ++i) + for (int i = 0; i < 25 && dot (z, z) < 4.f; ++i) // standard julia z = (float2)(z.x * z.x - z.y * z.y, 2.f * z.x * z.y) + c; - float3 colour = (float3)(z.x, z.y, z.x * z.y); - write_imagef (img, (int2)(get_global_id (0), get_global_id (1)), (float4)(colour * p.x * p.x, 1.)); + float3 colour = (float3)(z.x, z.y, atan2 (z.y, z.x)); + write_imagef (img, xy, (float4)(colour * p.x * p.x, 1.)); } EOF @@ -278,9 +283,8 @@ $queue->acquire_gl_objects ([$tex]); # configure and run our kernel - $kernel->set_image2d (0, $tex); - $kernel->set_float (1, $time); - $queue->nd_range_kernel ($kernel, undef, [256, 256], undef); + $kernel->setf ("mf", $tex, $time*2); # mf = memory object, float + $queue->nd_range_kernel ($kernel, undef, [$S, $S], undef); # release objects to opengl again $queue->release_gl_objects ([$tex]); @@ -304,6 +308,50 @@ select undef, undef, undef, 1/60; } +=head2 How to modify the previous example to not rely on GL sharing. + +For those poor souls with only a sucky CPU OpenCL implementation, you +currently have to read the image into some perl scalar, and then modify a +texture or use glDrawPixels or so). + +First, when you don't need gl sharing, you can create the context much simpler: + + $ctx = $platform->context (undef, [$dev]) + +To use a texture, you would modify the above example by creating an +OpenCL::Image manually instead of deriving it from a texture: + + my $tex = $ctx->image2d (OpenCL::MEM_WRITE_ONLY, OpenCL::RGBA, OpenCL::UNORM_INT8, $S, $S); + +And in the draw loop, intead of acquire_gl_objects/release_gl_objects, you +would read the image2d after the kernel has written it: + + $queue->read_image ($tex, 0, 0, 0, 0, $S, $S, 1, 0, 0, my $data); + +And then you would upload the pixel data to the texture (or use glDrawPixels): + + glTexSubImage2D_s GL_TEXTURE_2D, 0, 0, 0, $S, $S, GL_RGBA, GL_UNSIGNED_BYTE, $data; + +The fully modified example can be found at +L. + +=head2 Julia sets look soooo 80ies. + +Then colour them differently, e.g. using orbit traps! Replace the loop and +colour calculation from the previous examples by this: + + float2 dm = (float2)(1.f, 1.f); + + for (int i = 0; i < 25; ++i) + { + z = (float2)(z.x * z.x - z.y * z.y, 2.f * z.x * z.y) + c; + dm = fmin (dm, (float2)(fabs (dot (z, z) - 1.f), fabs (z.x - 1.f))); + } + + float3 colour = (float3)(dm.x * dm.y, dm.x * dm.y, dm.x); + +Also try C<-10.f> instead of C<-1.f>. + =head1 DOCUMENTATION =head2 BASIC CONVENTIONS @@ -342,12 +390,55 @@ context, no event is created. In all other contexts an event is returned by the method. -=item * This module expects all functions to return C. If any +=item * This module expects all functions to return C. If any other status is returned the function will throw an exception, so you don't normally have to to any error checking. =back +=head2 CONSTANTS + +All C constants that this module supports are always available +in the C namespace as C (i.e. without the C +prefix). Constants which are not defined in the header files used during +compilation, or otherwise are not available, will have the value C<0> (in +some cases, this will make them indistinguishable from real constants, +sorry). + +The latest version of this module knows and exports the constants +listed in L. + +=head2 OPENCL 1.1 VS. OPENCL 1.2 + +This module supports both OpenCL version 1.1 and 1.2, although the OpenCL +1.2 interface hasn't been tested much for lack of availability of an +actual implementation. + +Every function or method in this manual page that interfaces to a +particular OpenCL function has a link to the its C manual page. + +If the link contains a F<1.1>, then this function is an OpenCL 1.1 +function. Most but not all also exist in OpenCL 1.2, and this module +tries to emulate the missing ones for you, when told to do so at +compiletime. You can check whether a function was removed in OpenCL 1.2 by +replacing the F<1.1> component in the URL by F<1.2>. + +If the link contains a F<1.2>, then this is a OpenCL 1.2-only +function. Even if the module was compiled with OpenCL 1.2 header files +and has an 1.2 OpenCL library, calling such a function on a platform that +doesn't implement 1.2 causes undefined behaviour, usually a crash (But +this is not guaranteed). + +You can find out whether this module was compiled to prefer 1.1 +functionality by ooking at C - if it is true, then +1.1 functions generally are implemented using 1.1 OpenCL functions. If it +is false, then 1.1 functions missing from 1.2 are emulated by calling 1.2 +fucntions. + +This is a somewhat sorry state of affairs, but the Khronos group choose to +make every release of OpenCL source and binary incompatible with previous +releases. + =head2 PERL AND OPENCL TYPES This handy(?) table lists OpenCL types and their perl, PDL and pack/unpack @@ -388,13 +479,20 @@ perhaps most important, to signal completion of queued jobs (by setting callbacks on OpenCL::Event objects). +The OpenCL module converts all these callbacks into events - you can +still register callbacks, but they are not executed when your OpenCL +implementation calls the actual callback, but only later. Therefore, none +of the limitations of OpenCL callbacks apply to the perl implementation: +it is perfectly safe to make blocking operations from event callbacks, and +enqueued operations don't need to be flushed. + To facilitate this, this module maintains an event queue - each time an asynchronous event happens, it is queued, and perl will be interrupted. This is implemented via the L module. In addition, this module has L support, so it can seamlessly integrate itself into many event loops. -Since this module is a bit hard to understand, here are some case examples: +Since L is a bit hard to understand, here are some case examples: =head3 Don't use callbacks. @@ -501,7 +599,7 @@ our $POLL_FUNC; # set by XS BEGIN { - our $VERSION = '0.98'; + our $VERSION = '1.0'; require XSLoader; XSLoader::load (__PACKAGE__, $VERSION); @@ -516,6 +614,8 @@ @OpenCL::Kernel::ISA = @OpenCL::Event::ISA = OpenCL::Object::; + @OpenCL::SubDevice::ISA = OpenCL::Device::; + @OpenCL::Buffer::ISA = @OpenCL::Image::ISA = OpenCL::Memory::; @@ -529,6 +629,9 @@ @OpenCL::Image1DBuffer::ISA = OpenCL::Image::; @OpenCL::UserEvent::ISA = OpenCL::Event::; + + @OpenCL::MappedBuffer::ISA = + @OpenCL::MappedImage::ISA = OpenCL::Mapped::; } =head2 THE OpenCL PACKAGE @@ -542,9 +645,12 @@ =item $str = OpenCL::err2str [$errval] -Converts an error value into a human readable string. IF no error value is +Converts an error value into a human readable string. If no error value is given, then the last error will be used (as returned by OpenCL::errno). +The latest version of this module knows the error constants +listed in L. + =item $str = OpenCL::enum2str $enum Converts most enum values (of parameter names, image format constants, @@ -553,6 +659,9 @@ very helpful to pass it through this function to maybe get some readable string out of it. +The latest version of this module knows the enumaration constants +listed in L. + =item @platforms = OpenCL::platforms Returns all available OpenCL::Platform objects. @@ -562,13 +671,17 @@ =item $ctx = OpenCL::context_from_type $properties, $type = OpenCL::DEVICE_TYPE_DEFAULT, $callback->($err, $pvt) = $print_stderr Tries to create a context from a default device and platform type - never worked for me. +Consider using C<< $platform->context_from_type >> instead. + +type: OpenCL::DEVICE_TYPE_DEFAULT, OpenCL::DEVICE_TYPE_CPU, OpenCL::DEVICE_TYPE_GPU, +OpenCL::DEVICE_TYPE_ACCELERATOR, OpenCL::DEVICE_TYPE_CUSTOM, OpenCL::DEVICE_TYPE_ALL. L =item $ctx = OpenCL::context $properties, \@devices, $callback->($err, $pvt) = $print_stderr) -Create a new OpenCL::Context object using the given device object(s). This -function isn't implemented yet, use C<< $platform->context >> instead. +Create a new OpenCL::Context object using the given device object(s). +Consider using C<< $platform->context >> instead. L @@ -659,7 +772,7 @@ =item $ctx = $platform->context ($properties, \@devices, $callback->($err, $pvt) = $print_stderr) Create a new OpenCL::Context object using the given device object(s)- a -CL_CONTEXT_PLATFORM property is supplied automatically. +OpenCL::CONTEXT_PLATFORM property is supplied automatically. L @@ -685,23 +798,23 @@ =item $string = $platform->profile -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $platform->version -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $platform->name -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $platform->vendor -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $platform->extensions -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end platform @@ -715,273 +828,305 @@ See C<< $platform->info >> for details. +type: OpenCL::DEVICE_TYPE_DEFAULT, OpenCL::DEVICE_TYPE_CPU, +OpenCL::DEVICE_TYPE_GPU, OpenCL::DEVICE_TYPE_ACCELERATOR, +OpenCL::DEVICE_TYPE_CUSTOM, OpenCL::DEVICE_TYPE_ALL. + +fp_config: OpenCL::FP_DENORM, OpenCL::FP_INF_NAN, OpenCL::FP_ROUND_TO_NEAREST, +OpenCL::FP_ROUND_TO_ZERO, OpenCL::FP_ROUND_TO_INF, OpenCL::FP_FMA, +OpenCL::FP_SOFT_FLOAT, OpenCL::FP_CORRECTLY_ROUNDED_DIVIDE_SQRT. + +mem_cache_type: OpenCL::NONE, OpenCL::READ_ONLY_CACHE, OpenCL::READ_WRITE_CACHE. + +local_mem_type: OpenCL::LOCAL, OpenCL::GLOBAL. + +exec_capabilities: OpenCL::EXEC_KERNEL, OpenCL::EXEC_NATIVE_KERNEL. + +command_queue_properties: OpenCL::QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, +OpenCL::QUEUE_PROFILING_ENABLE. + +partition_properties: OpenCL::DEVICE_PARTITION_EQUALLY, +OpenCL::DEVICE_PARTITION_BY_COUNTS, OpenCL::DEVICE_PARTITION_BY_COUNTS_LIST_END, +OpenCL::DEVICE_PARTITION_BY_AFFINITY_DOMAIN. + +affinity_domain: OpenCL::DEVICE_AFFINITY_DOMAIN_NUMA, +OpenCL::DEVICE_AFFINITY_DOMAIN_L4_CACHE, OpenCL::DEVICE_AFFINITY_DOMAIN_L3_CACHE, +OpenCL::DEVICE_AFFINITY_DOMAIN_L2_CACHE, OpenCL::DEVICE_AFFINITY_DOMAIN_L1_CACHE, +OpenCL::DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE. + L +=item @devices = $device->sub_devices (\@properties) + +Creates OpencL::SubDevice objects by partitioning an existing device. + +L + =for gengetinfo begin device =item $device_type = $device->type -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->vendor_id -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_compute_units -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_work_item_dimensions -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->max_work_group_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item @ints = $device->max_work_item_sizes -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_char -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_short -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_int -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_long -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_float -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_double -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_clock_frequency -Calls C with C and returns the result. +Calls C with C and returns the result. =item $bitfield = $device->address_bits -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_read_image_args -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_write_image_args -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $device->max_mem_alloc_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->image2d_max_width -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->image2d_max_height -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->image3d_max_width -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->image3d_max_height -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->image3d_max_depth -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->image_support -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->max_parameter_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_samplers -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->mem_base_addr_align -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->min_data_type_align_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device_fp_config = $device->single_fp_config -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device_mem_cache_type = $device->global_mem_cache_type -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->global_mem_cacheline_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $device->global_mem_cache_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $device->global_mem_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $device->max_constant_buffer_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->max_constant_args -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device_local_mem_type = $device->local_mem_type -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $device->local_mem_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $boolean = $device->error_correction_support -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $device->profiling_timer_resolution -Calls C with C and returns the result. +Calls C with C and returns the result. =item $boolean = $device->endian_little -Calls C with C and returns the result. +Calls C with C and returns the result. =item $boolean = $device->available -Calls C with C and returns the result. +Calls C with C and returns the result. =item $boolean = $device->compiler_available -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device_exec_capabilities = $device->execution_capabilities -Calls C with C and returns the result. +Calls C with C and returns the result. =item $command_queue_properties = $device->properties -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ = $device->platform -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $device->name -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $device->vendor -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $device->driver_version -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $device->profile -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $device->version -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $device->extensions -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->preferred_vector_width_half -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_char -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_short -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_int -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_long -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_float -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_double -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->native_vector_width_half -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device_fp_config = $device->double_fp_config -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device_fp_config = $device->half_fp_config -Calls C with C and returns the result. +Calls C with C and returns the result. =item $boolean = $device->host_unified_memory -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device = $device->parent_device_ext -Calls C with C and returns the result. +Calls C with C and returns the result. =item @device_partition_property_exts = $device->partition_types_ext -Calls C with C and returns the result. +Calls C with C and returns the result. =item @device_partition_property_exts = $device->affinity_domains_ext -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $device->reference_count_ext -Calls C with C and returns the result. +Calls C with C and returns the result. =item @device_partition_property_exts = $device->partition_style_ext -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end device @@ -989,6 +1134,32 @@ =head2 THE OpenCL::Context CLASS +An OpenCL::Context is basically a container, or manager, for a number of +devices of a platform. It is used to create all sorts of secondary objects +such as buffers, queues, programs and so on. + +All context creation functions and methods take a list of properties +(type-value pairs). All property values can be specified as integers - +some additionally support other types: + +=over 4 + +=item OpenCL::CONTEXT_PLATFORM + +Also accepts OpenCL::Platform objects. + +=item OpenCL::GLX_DISPLAY_KHR + +Also accepts C, in which case a deep and troubling hack is engaged +to find the current glx display (see L). + +=item OpenCL::GL_CONTEXT_KHR + +Also accepts C, in which case a deep and troubling hack is engaged +to find the current glx context (see L). + +=back + =over 4 =item $prog = $ctx->build_program ($program, $options = "") @@ -1011,6 +1182,7 @@ eval { $prog->build (undef, $options); 1 } or errno == BUILD_PROGRAM_FAILURE + or errno == INVALID_BINARY # workaround nvidia bug or Carp::croak "OpenCL::Context->build_program: " . err2str; # we check status for all devices @@ -1044,6 +1216,10 @@ Creates a new OpenCL::Buffer (actually OpenCL::BufferObj) object with the given flags and octet-size. +flags: OpenCL::MEM_READ_WRITE, OpenCL::MEM_WRITE_ONLY, OpenCL::MEM_READ_ONLY, +OpenCL::MEM_USE_HOST_PTR, OpenCL::MEM_ALLOC_HOST_PTR, OpenCL::MEM_COPY_HOST_PTR, +OpenCL::MEM_HOST_WRITE_ONLY, OpenCL::MEM_HOST_READ_ONLY, OpenCL::MEM_HOST_NO_ACCESS. + L =item $buf = $ctx->buffer_sv ($flags, $data) @@ -1056,6 +1232,21 @@ Creates a new OpenCL::Image object and optionally initialises it with the given data values. +channel_order: OpenCL::R, OpenCL::A, OpenCL::RG, OpenCL::RA, OpenCL::RGB, +OpenCL::RGBA, OpenCL::BGRA, OpenCL::ARGB, OpenCL::INTENSITY, OpenCL::LUMINANCE, +OpenCL::Rx, OpenCL::RGx, OpenCL::RGBx. + +channel_type: OpenCL::SNORM_INT8, OpenCL::SNORM_INT16, OpenCL::UNORM_INT8, +OpenCL::UNORM_INT16, OpenCL::UNORM_SHORT_565, OpenCL::UNORM_SHORT_555, +OpenCL::UNORM_INT_101010, OpenCL::SIGNED_INT8, OpenCL::SIGNED_INT16, +OpenCL::SIGNED_INT32, OpenCL::UNSIGNED_INT8, OpenCL::UNSIGNED_INT16, +OpenCL::UNSIGNED_INT32, OpenCL::HALF_FLOAT, OpenCL::FLOAT. + +type: OpenCL::MEM_OBJECT_BUFFER, OpenCL::MEM_OBJECT_IMAGE2D, +OpenCL::MEM_OBJECT_IMAGE3D, OpenCL::MEM_OBJECT_IMAGE2D_ARRAY, +OpenCL::MEM_OBJECT_IMAGE1D, OpenCL::MEM_OBJECT_IMAGE1D_ARRAY, +OpenCL::MEM_OBJECT_IMAGE1D_BUFFER. + L =item $img = $ctx->image2d ($flags, $channel_order, $channel_type, $width, $height, $row_pitch = 0, $data = undef) @@ -1077,6 +1268,8 @@ Creates a new OpenCL::Buffer (actually OpenCL::BufferObj) object that refers to the given OpenGL buffer object. +flags: OpenCL::MEM_READ_WRITE, OpenCL::MEM_READ_ONLY, OpenCL::MEM_WRITE_ONLY. + http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clCreateFromGLBuffer.html =item $img = $ctx->gl_texture ($flags, $target, $miplevel, $texture) @@ -1084,6 +1277,13 @@ Creates a new OpenCL::Image object that refers to the given OpenGL texture object or buffer. +target: GL_TEXTURE_1D, GL_TEXTURE_1D_ARRAY, GL_TEXTURE_BUFFER, +GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D, +GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_POSITIVE_Y, +GL_TEXTURE_CUBE_MAP_POSITIVE_Z, GL_TEXTURE_CUBE_MAP_NEGATIVE_X, +GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, +GL_TEXTURE_RECTANGLE/GL_TEXTURE_RECTANGLE_ARB. + http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateFromGLTexture.html =item $img = $ctx->gl_texture2d ($flags, $target, $miplevel, $texture) @@ -1118,6 +1318,11 @@ Creates a new OpenCL::Sampler object. +addressing_mode: OpenCL::ADDRESS_NONE, OpenCL::ADDRESS_CLAMP_TO_EDGE, +OpenCL::ADDRESS_CLAMP, OpenCL::ADDRESS_REPEAT, OpenCL::ADDRESS_MIRRORED_REPEAT. + +filter_mode: OpenCL::FILTER_NEAREST, OpenCL::FILTER_LINEAR. + L =item $program = $ctx->program_with_source ($string) @@ -1126,6 +1331,30 @@ L +=item ($program, \@status) = $ctx->program_with_binary (\@devices, \@binaries) + +Creates a new OpenCL::Program object from the given binaries. + +L + +Example: clone an existing program object that contains a successfully +compiled program, no matter how useless this is. + + my $clone = $ctx->program_with_binary ([$prog->devices], [$prog->binaries]); + +=item $program = $ctx->program_with_built_in_kernels (\@devices, $kernel_names) + +Creates a new OpenCL::Program object from the given built-in kernel names. + +L + +=item $program = $ctx->link_program (\@devices, $options, \@programs, $cb->($program) = undef) + +Links all (already compiled) program objects specified in C<@programs> +together and returns a new OpenCL::Program object with the result. + +L + =item $packed_value = $ctx->info ($name) See C<< $platform->info >> for details. @@ -1136,19 +1365,19 @@ =item $uint = $context->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item @devices = $context->devices -Calls C with C and returns the result. +Calls C with C and returns the result. =item @property_ints = $context->properties -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $context->num_devices -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end context @@ -1197,24 +1426,29 @@ L -=item $ev = $queue->read_buffer_rect (OpenCL::Memory buf, cl_bool blocking, $buf_x, $buf_y, $buf_z, $host_x, $host_y, $host_z, $width, $height, $depth, $buf_row_pitch, $buf_slice_pitch, $host_row_pitch, $host_slice_pitch, $data, $wait_events...) +$eue->read_buffer_rect ($buf, cl_bool blocking, $buf_x, $buf_y, $buf_z, $host_x, $host_y, $host_z, $width, $height, $depth, $buf_row_pitch, $buf_slice_pitch, $host_row_pitch, $host_slice_pitch, $data, $wait_events...) http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueReadBufferRect.html -=item $ev = $queue->write_buffer_rect (OpenCL::Memory buf, cl_bool blocking, $buf_x, $buf_y, $buf_z, $host_x, $host_y, $host_z, $width, $height, $depth, $buf_row_pitch, $buf_slice_pitch, $host_row_pitch, $host_slice_pitch, $data, $wait_events...) +=item $ev = $queue->write_buffer_rect ($buf, $blocking, $buf_y, $host_x, $host_z, $height, $buf_row_pitch, $host_row_pitch, $data, $wait_events...) http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueWriteBufferRect.html -=item $ev = $queue->read_image ($src, $blocking, $x, $y, $z, $width, $height, $depth, $row_pitch, $slice_pitch, $data, $wait_events...) +=item $ev = $queue->copy_buffer_to_image ($src_buffer, $dst_image, $src_offset, $dst_x, $dst_y, $dst_z, $width, $height, $depth, $wait_events...) -L +L -=item $ev = $queue->copy_buffer_to_image ($src_buffer, $dst_image, $src_offset, $dst_x, $dst_y, $dst_z, $width, $height, $depth, $wait_events...) +=item $ev = $queue->read_image ($src, $blocking, $x, $y, $z, $width, $height, $depth, $row_pitch, $slice_pitch, $data, $wait_events...) + +C<$row_pitch> (and C<$slice_pitch>) can be C<0>, in which case the OpenCL +module uses the image width (and height) to supply default values. L =item $ev = $queue->write_image ($src, $blocking, $x, $y, $z, $width, $height, $depth, $row_pitch, $slice_pitch, $data, $wait_events...) +C<$row_pitch> (and C<$slice_pitch>) can be C<0>, in which case the OpenCL +module uses the image width (and height) to supply default values. L =item $ev = $queue->copy_image ($src_image, $dst_image, $src_x, $src_y, $src_z, $dst_x, $dst_y, $dst_z, $width, $height, $depth, $wait_events...) @@ -1269,6 +1503,14 @@ L +=item $ev = $queue->migrate_mem_objects (\@mem_objects, $flags, $wait_events...) + +Migrates a number of OpenCL::Memory objects to or from the device. + +flags: OpenCL::MIGRATE_MEM_OBJECT_HOST, OpenCL::MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED + +L + =item $ev = $queue->acquire_gl_objects ([object, ...], $wait_events...) Enqueues a list (an array-ref of OpenCL::Memory objects) to be acquired @@ -1313,24 +1555,93 @@ =item $ctx = $command_queue->context -Calls C with C and returns the result. +Calls C with C and returns the result. =item $device = $command_queue->device -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $command_queue->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $command_queue_properties = $command_queue->properties -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end command_queue =back +=head3 MEMORY MAPPED BUFFERS + +OpenCL allows you to map buffers and images to host memory (read: perl +scalars). This is done much like reading or copying a buffer, by enqueuing +a map or unmap operation on the command queue. + +The map operations return an C object - see L section for details on what to do with these +objects. + +The object will be unmapped automatically when the mapped object is +destroyed (you can use a barrier to make sure the unmap has finished, +before using the buffer in a kernel), but you can also enqueue an unmap +operation manually. + +=over 4 + +=item $mapped_buffer = $queue->map_buffer ($buf, $blocking=1, $map_flags=OpenCL::MAP_READ|OpenCL::MAP_WRITE, $offset=0, $size=undef, $wait_events...) + +Maps the given buffer into host memory and returns an +C object. If C<$size> is specified as undef, then +the map will extend to the end of the buffer. + +map_flags: OpenCL::MAP_READ, OpenCL::MAP_WRITE, OpenCL::MAP_WRITE_INVALIDATE_REGION. + +L + +Example: map the buffer $buf fully and replace the first 4 bytes by "abcd", then unmap. + + { + my $mapped = $queue->map_buffer ($buf, 1, OpenCL::MAP_WRITE); + substr $$mapped, 0, 4, "abcd"; + } # asynchronously unmap because $mapped is destroyed + +=item $mapped_image = $queue->map_image ($img, $blocking=1, $map_flags=OpenCL::MAP_READ|OpenCL::MAP_WRITE, $x=0, $y=0, $z=0, $width=undef, $height=undef, $depth=undef, $wait_events...) + +Maps the given image area into host memory and return an +C object. + +If any of C<$width>, C<$height> and/or C<$depth> are C then they +will be replaced by the maximum possible value. + +L + +Example: map an image (with OpenCL::UNSIGNED_INT8 channel type) and set +the first channel of the leftmost column to 5, then explicitly unmap +it. You are not necessarily meant to do it this way, this example just +shows you the accessors to use :) + + my $mapped = $queue->map_image ($image, 1, OpenCL::MAP_WRITE); + + $mapped->write ($_ * $mapped->row_pitch, pack "C", 5) + for 0 .. $mapped->height - 1; + + $mapped->unmap;. + $mapped->wait; # only needed for out of order queues normally + +=item $ev = $queue->unmap ($mapped, $wait_events...) + +Unmaps the data from host memory. You must not call any methods that +modify the data, or modify the data scalar directly, after calling this +method. + +The mapped event object will always be passed as part of the +$wait_events. The mapped event object will be replaced by the new event +object that this request creates. + +=back + =head2 THE OpenCL::Memory CLASS This the superclass of all memory objects - OpenCL::Buffer, OpenCL::Image, @@ -1344,43 +1655,49 @@ L +=item $memory->destructor_callback ($cb->()) + +Sets a callback that will be invoked after the memory object is destructed. + +L + =for gengetinfo begin mem =item $mem_object_type = $mem->type -Calls C with C and returns the result. +Calls C with C and returns the result. =item $mem_flags = $mem->flags -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $mem->size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ptr_value = $mem->host_ptr -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $mem->map_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $mem->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ctx = $mem->context -Calls C with C and returns the result. +Calls C with C and returns the result. =item $mem = $mem->associated_memobject -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $mem->offset -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end mem @@ -1410,7 +1727,7 @@ =item $subbuf = $buf_obj->sub_buffer_region ($flags, $origin, $size) Creates an OpenCL::Buffer objects from this buffer and returns it. The -C is assumed to be C. +C is assumed to be C. L @@ -1436,33 +1753,33 @@ =item ($channel_order, $channel_data_type) = $image->format Returns the channel order and type used to create the image by calling -C with C. +C with C. =for gengetinfo begin image =item $int = $image->element_size -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $image->row_pitch -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $image->slice_pitch -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $image->width -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $image->height -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $image->depth -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end image @@ -1470,11 +1787,11 @@ =item $GLenum = $gl_texture->target -Calls C with C and returns the result. +Calls C with C and returns the result. =item $GLint = $gl_texture->gl_mipmap_level -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end gl_texture @@ -1494,23 +1811,23 @@ =item $uint = $sampler->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ctx = $sampler->context -Calls C with C and returns the result. +Calls C with C and returns the result. =item $addressing_mode = $sampler->normalized_coords -Calls C with C and returns the result. +Calls C with C and returns the result. =item $filter_mode = $sampler->addressing_mode -Calls C with C and returns the result. +Calls C with C and returns the result. =item $boolean = $sampler->filter_mode -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end sampler @@ -1530,11 +1847,25 @@ compiling whether you use a callback or not. See C if you want to make sure the build is done in the background. -Note that some OpenCL implementations atc up badly, and don't call the +Note that some OpenCL implementations act up badly, and don't call the callback in some error cases (but call it in others). This implementation assumes the callback will always be called, and leaks memory if this is not so. So best make sure you don't pass in invalid values. +Some implementations fail with C when the +compilation state is successful but some later stage fails. + +options: C<-D name>, C<-D name=definition>, C<-I dir>, +C<-cl-single-precision-constant>, C<-cl-denorms-are-zero>, +C<-cl-fp32-correctly-rounded-divide-sqrt>, C<-cl-opt-disable>, +C<-cl-mad-enable>, C<-cl-no-signed-zeros>, C<-cl-unsafe-math-optimizations>, +C<-cl-finite-math-only>, C<-cl-fast-relaxed-math>, +C<-w>, C<-Werror>, C<-cl-std=CL1.1/CL1.2>, C<-cl-kernel-arg-info>, +C<-create-library>, C<-enable-link-options>. + +build_status: OpenCL::BUILD_SUCCESS, OpenCL::BUILD_NONE, +OpenCL::BUILD_ERROR, OpenCL::BUILD_IN_PROGRESS. + L =item $program->build_async (\@devices = undef, $options = "", $cb->($program) = undef) @@ -1542,11 +1873,24 @@ Similar to C<< ->build >>, except it starts a thread, and never fails (you need to check the compilation status form the callback, or by polling). +=item $program->compile (\@devices = undef, $options = "", \%headers = undef, $cb->($program) = undef) + +Compiles the given program for the given devices (or all devices if +undef). If C<$headers> is given, it must be a hashref with include name => +OpenCL::Program pairs. + +L + =item $packed_value = $program->build_info ($device, $name) Similar to C<< $platform->info >>, but returns build info for a previous build attempt for the given device. +binary_type: OpenCL::PROGRAM_BINARY_TYPE_NONE, +OpenCL::PROGRAM_BINARY_TYPE_COMPILED_OBJECT, +OpenCL::PROGRAM_BINARY_TYPE_LIBRARY, +OpenCL::PROGRAM_BINARY_TYPE_EXECUTABLE. + L =item $kernel = $program->kernel ($function_name) @@ -1566,15 +1910,19 @@ =item $build_status = $program->build_status ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $program->build_options ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $program->build_log ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. + +=item $binary_type = $program->binary_type ($device) + +Calls C with C and returns the result. =for gengetinfo end program_build @@ -1588,27 +1936,27 @@ =item $uint = $program->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ctx = $program->context -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $program->num_devices -Calls C with C and returns the result. +Calls C with C and returns the result. =item @devices = $program->devices -Calls C with C and returns the result. +Calls C with C and returns the result. =item $string = $program->source -Calls C with C and returns the result. +Calls C with C and returns the result. =item @ints = $program->binary_sizes -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end program @@ -1639,23 +1987,23 @@ =item $string = $kernel->function_name -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $kernel->num_args -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $kernel->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ctx = $kernel->context -Calls C with C and returns the result. +Calls C with C and returns the result. =item $program = $kernel->program -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end kernel @@ -1663,35 +2011,62 @@ See C<< $platform->info >> for details. -The reason this method is not called C is that there already is an -C<< ->info >> method. - L =for gengetinfo begin kernel_work_group =item $int = $kernel->work_group_size ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =item @ints = $kernel->compile_work_group_size ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $kernel->local_mem_size ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =item $int = $kernel->preferred_work_group_size_multiple ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $kernel->private_mem_size ($device) -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end kernel_work_group +=item $packed_value = $kernel->arg_info ($idx, $name) + +See C<< $platform->info >> for details. + +L + +=for gengetinfo begin kernel_arg + +=item $kernel_arg_address_qualifier = $kernel->arg_address_qualifier ($idx) + +Calls C with C and returns the result. + +=item $kernel_arg_access_qualifier = $kernel->arg_access_qualifier ($idx) + +Calls C with C and returns the result. + +=item $string = $kernel->arg_type_name ($idx) + +Calls C with C and returns the result. + +=item $kernel_arg_type_qualifier = $kernel->arg_type_qualifier ($idx) + +Calls C with C and returns the result. + +=item $string = $kernel->arg_name ($idx) + +Calls C with C and returns the result. + +=for gengetinfo end kernel_arg + =item $kernel->setf ($format, ...) Sets the arguments of a kernel. Since OpenCL 1.1 doesn't have a generic @@ -1812,23 +2187,23 @@ =item $queue = $event->command_queue -Calls C with C and returns the result. +Calls C with C and returns the result. =item $command_type = $event->command_type -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $event->reference_count -Calls C with C and returns the result. +Calls C with C and returns the result. =item $uint = $event->command_execution_status -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ctx = $event->context -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end event @@ -1845,19 +2220,19 @@ =item $ulong = $event->profiling_command_queued -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $event->profiling_command_submit -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $event->profiling_command_start -Calls C with C and returns the result. +Calls C with C and returns the result. =item $ulong = $event->profiling_command_end -Calls C with C and returns the result. +Calls C with C and returns the result. =for gengetinfo end profiling @@ -1874,10 +2249,137 @@ Sets the execution status of the user event. Can only be called once, either with OpenCL::COMPLETE or a negative number as status. +execution_status: OpenCL::COMPLETE or a negative integer. + L =back +=head2 THE OpenCL::Mapped CLASS + +This class represents objects mapped into host memory. They are +represented by a blessed string scalar. The string data is the mapped +memory area, that is, if you read or write it, then the mapped object is +accessed directly. + +You must only ever use operations that modify the string in-place - for +example, a C that doesn't change the length, or maybe a regex that +doesn't change the length. Any other operation might cause the data to be +copied. + +When the object is destroyed it will enqueue an implicit unmap operation +on the queue that was used to create it. + +Keep in mind that you I to unmap (or destroy) mapped objects before +OpenCL sees the changes, even if some implementations don't need this +sometimes. + +Example, replace the first two floats in the mapped buffer by 1 and 2. + + my $mapped = $queue->map_buffer ($buf, ... + $mapped->event->wait; # make sure it's there + + # now replace first 8 bytes by new data, which is exactly 8 bytes long + # we blindly assume device endianness to equal host endianness + # (and of course, we assume iee 754 single precision floats :) + substr $$mapped, 0, 8, pack "f*", 1, 2; + +=over 4 + +=item $ev = $mapped->unmap ($wait_events...) + +Unmaps the mapped memory object, using the queue originally used to create +it, quite similarly to C<< $queue->unmap ($mapped, ...) >>. + +=item $bool = $mapped->mapped + +Returns whether the object is still mapped - true before an C is +enqueued, false afterwards. + +=item $ev = $mapped->event + +Return the event object associated with the mapped object. Initially, this +will be the event object created when mapping the object, and after an +unmap, this will be the event object that the unmap operation created. + +=item $mapped->wait + +Same as C<< $mapped->event->wait >> - makes sure no operations on this +mapped object are outstanding. + +=item $bytes = $mapped->size + +Returns the size of the mapped area, in bytes. Same as C. + +=item $ptr = $mapped->ptr + +Returns the raw memory address of the mapped area. + +=item $mapped->set ($offset, $data) + +Replaces the data at the given C<$offset> in the memory area by the new +C<$data>. This method is safer than direct manipulation of C<$mapped> +because it does bounds-checking, but also slower. + +=item $data = $mapped->get ($offset, $length) + +Returns (without copying) a scalar representing the data at the given +C<$offset> and C<$length> in the mapped memory area. This is the same as +the following substr, except much slower; + + $data = substr $$mapped, $offset, $length + +=cut + +sub OpenCL::Mapped::get { + substr ${$_[0]}, $_[1], $_[2] +} + +=back + +=head2 THE OpenCL::MappedBuffer CLASS + +This is a subclass of OpenCL::Mapped, representing mapped buffers. + +=head2 THE OpenCL::MappedImage CLASS + +This is a subclass of OpenCL::Mapped, representing mapped images. + +=over 4 + +=item $pixels = $mapped->width + +=item $pixels = $mapped->height + +=item $pixels = $mapped->depth + +Return the width/height/depth of the mapped image region, in pixels. + +=item $bytes = $mapped->row_pitch + +=item $bytes = $mapped->slice_pitch + +Return the row or slice pitch of the image that has been mapped. + +=item $bytes = $mapped->element_size + +Return the size of a single pixel. + +=item $data = $mapped->get_row ($count, $x=0, $y=0, $z=0) + +Return C<$count> pixels from the given coordinates. The pixel data must +be completely contained within a single row. + +If C<$count> is C, then all the remaining pixels in that row are +returned. + +=item $mapped->set_row ($data, $x=0, $y=0, $z=0) + +Write the given pixel data at the given coordinate. The pixel data must +be completely contained within a single row. + +=back + =cut 1;