… | |
… | |
28 | specific device ("compiling and linking"), also binary programs. For each |
28 | specific device ("compiling and linking"), also binary programs. For each |
29 | kernel function in a program you can then create an OpenCL::Kernel object |
29 | kernel function in a program you can then create an OpenCL::Kernel object |
30 | which represents basically a function call with argument values. |
30 | which represents basically a function call with argument values. |
31 | |
31 | |
32 | OpenCL::Memory objects of various flavours: OpenCL::Buffers objects (flat |
32 | OpenCL::Memory objects of various flavours: OpenCL::Buffers objects (flat |
33 | memory areas, think array) and OpenCL::Image objects (think 2d or 3d |
33 | memory areas, think arrays or structs) and OpenCL::Image objects (think 2d |
34 | array) for bulk data and input and output for kernels. |
34 | or 3d array) for bulk data and input and output for kernels. |
35 | |
35 | |
36 | OpenCL::Sampler objects, which are kind of like texture filter modes in |
36 | OpenCL::Sampler objects, which are kind of like texture filter modes in |
37 | OpenGL. |
37 | OpenGL. |
38 | |
38 | |
39 | OpenCL::Queue objects - command queues, which allow you to submit memory |
39 | OpenCL::Queue objects - command queues, which allow you to submit memory |
… | |
… | |
51 | http://www.khronos.org/registry/cl/specs/opencl-1.1.pdf |
51 | http://www.khronos.org/registry/cl/specs/opencl-1.1.pdf |
52 | |
52 | |
53 | OpenCL manpages: |
53 | OpenCL manpages: |
54 | |
54 | |
55 | http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/ |
55 | http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/ |
|
|
56 | |
|
|
57 | If you are into UML class diagrams, the following diagram might help - if |
|
|
58 | not, it will be mildly cobfusing: |
|
|
59 | |
|
|
60 | http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/classDiagram.html |
|
|
61 | |
|
|
62 | Here's a tutorial from AMD (very AMD-centric, too), not sure how useful it |
|
|
63 | is, but at least it's free of charge: |
|
|
64 | |
|
|
65 | http://developer.amd.com/zones/OpenCLZone/courses/Documents/Introduction_to_OpenCL_Programming%20Training_Guide%20%28201005%29.pdf |
|
|
66 | |
|
|
67 | And here's NVIDIA's OpenCL Best Practises Guide: |
|
|
68 | |
|
|
69 | http://developer.download.nvidia.com/compute/cuda/3_2/toolkit/docs/OpenCL_Best_Practices_Guide.pdf |
56 | |
70 | |
57 | =head1 BASIC WORKFLOW |
71 | =head1 BASIC WORKFLOW |
58 | |
72 | |
59 | To get something done, you basically have to do this once (refer to the |
73 | To get something done, you basically have to do this once (refer to the |
60 | examples below for actual code, this is just a high-level description): |
74 | examples below for actual code, this is just a high-level description): |
… | |
… | |
138 | |
152 | |
139 | my $src = ' |
153 | my $src = ' |
140 | __kernel void |
154 | __kernel void |
141 | squareit (__global float *input, __global float *output) |
155 | squareit (__global float *input, __global float *output) |
142 | { |
156 | { |
143 | size_t id = get_global_id (0); |
157 | $id = get_global_id (0); |
144 | output [id] = input [id] * input [id]; |
158 | output [id] = input [id] * input [id]; |
145 | } |
159 | } |
146 | '; |
160 | '; |
147 | |
161 | |
148 | my $prog = $ctx->program_with_source ($src); |
162 | my $prog = $ctx->program_with_source ($src); |
… | |
… | |
212 | |
226 | |
213 | =item * Object lifetime managament is automatic - there is no need |
227 | =item * Object lifetime managament is automatic - there is no need |
214 | to free objects explicitly (C<clReleaseXXX>), the release function |
228 | to free objects explicitly (C<clReleaseXXX>), the release function |
215 | is called automatically once all Perl references to it go away. |
229 | is called automatically once all Perl references to it go away. |
216 | |
230 | |
217 | =item * OpenCL uses CamelCase for function names (C<clGetPlatformInfo>), |
231 | =item * OpenCL uses CamelCase for function names (e.g. C<clGetPlatformIDs>, C<clGetPlatformInfo>), |
218 | while this module uses underscores as word separator and often leaves out |
232 | while this module uses underscores as word separator and often leaves out |
219 | prefixes (C<< $platform->info >>). |
233 | prefixes (C<OpenCL::platforms>, C<< $platform->info >>). |
220 | |
234 | |
221 | =item * OpenCL often specifies fixed vector function arguments as short |
235 | =item * OpenCL often specifies fixed vector function arguments as short |
222 | arrays (C<size_t origin[3]>), while this module explicitly expects the |
236 | arrays (C<size_t origin[3]>), while this module explicitly expects the |
223 | components as separate arguments- |
237 | components as separate arguments (C<$orig_x, $orig_y, $orig_z>) in |
|
|
238 | function calls. |
224 | |
239 | |
225 | =item * Structures are often specified with their components, and returned |
240 | =item * Structures are often specified by flattening out their components |
226 | as arrayrefs. |
241 | as with short vectors, and returned as arrayrefs. |
227 | |
|
|
228 | =item * Where possible, one of the pitch values is calculated from the |
|
|
229 | perl scalar length and need not be specified. |
|
|
230 | |
242 | |
231 | =item * When enqueuing commands, the wait list is specified by adding |
243 | =item * When enqueuing commands, the wait list is specified by adding |
232 | extra arguments to the function - anywhere a C<$wait_events...> argument |
244 | extra arguments to the function - anywhere a C<$wait_events...> argument |
233 | is documented this can be any number of event objects. |
245 | is documented this can be any number of event objects. |
234 | |
246 | |
… | |
… | |
374 | |
386 | |
375 | =item $buf = $ctx->buffer_sv ($flags, $data) |
387 | =item $buf = $ctx->buffer_sv ($flags, $data) |
376 | |
388 | |
377 | Creates a new OpenCL::Buffer object and initialise it with the given data values. |
389 | Creates a new OpenCL::Buffer object and initialise it with the given data values. |
378 | |
390 | |
379 | =item $img = $ctx->image2d ($flags, $channel_order, $channel_type, $width, $height, $data) |
391 | =item $img = $ctx->image2d ($flags, $channel_order, $channel_type, $width, $height, $row_pitch = 0, $data = undef) |
380 | |
392 | |
381 | Creates a new OpenCL::Image2D object and optionally initialises it with the given data values. |
393 | Creates a new OpenCL::Image2D object and optionally initialises it with the given data values. |
382 | |
394 | |
383 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clCreateImage2D.html> |
395 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clCreateImage2D.html> |
384 | |
396 | |
385 | =item $img = $ctx->image3d ($flags, $channel_order, $channel_type, $width, $height, $depth, $slice_pitch, $data) |
397 | =item $img = $ctx->image3d ($flags, $channel_order, $channel_type, $width, $height, $depth, $row_pitch = 0, $slice_pitch = 0, $data = undef) |
386 | |
398 | |
387 | Creates a new OpenCL::Image3D object and optionally initialises it with the given data values. |
399 | Creates a new OpenCL::Image3D object and optionally initialises it with the given data values. |
388 | |
400 | |
389 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clCreateImage3D.html> |
401 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clCreateImage3D.html> |
390 | |
402 | |
… | |
… | |
453 | |
465 | |
454 | =item $ev = $queue->enqueue_read_image ($src, $blocking, $x, $y, $z, $width, $height, $depth, $row_pitch, $slice_pitch, $data, $wait_events...) |
466 | =item $ev = $queue->enqueue_read_image ($src, $blocking, $x, $y, $z, $width, $height, $depth, $row_pitch, $slice_pitch, $data, $wait_events...) |
455 | |
467 | |
456 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueReadImage.html> |
468 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueReadImage.html> |
457 | |
469 | |
458 | =item $ev = $queue->enqueue_write_image ($src, $blocking, $x, $y, $z, $width, $height, $depth, $row_pitch, $data, $wait_events...) |
470 | =item $ev = $queue->enqueue_write_image ($src, $blocking, $x, $y, $z, $width, $height, $depth, $row_pitch, $slice_pitch, $data, $wait_events...) |
459 | |
471 | |
460 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueWriteImage.html> |
472 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueWriteImage.html> |
461 | |
473 | |
462 | =item $ev = $queue->enqueue_copy_buffer_rect ($src, $dst, $src_x, $src_y, $src_z, $dst_x, $dst_y, $dst_z, $width, $height, $depth, $src_row_pitch, $src_slice_pitch, 4dst_row_pitch, $dst_slice_pitch, $ait_event...) |
474 | =item $ev = $queue->enqueue_copy_buffer_rect ($src, $dst, $src_x, $src_y, $src_z, $dst_x, $dst_y, $dst_z, $width, $height, $depth, $src_row_pitch, $src_slice_pitch, $dst_row_pitch, $dst_slice_pitch, $wait_event...) |
463 | |
475 | |
464 | Yeah. |
476 | Yeah. |
465 | |
477 | |
466 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueCopyBufferRect.html> |
478 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueCopyBufferRect.html> |
467 | |
479 | |
468 | =item $ev = $queue->enqueue_copy_buffer_to_image (OpenCL::Buffer src, OpenCL::Image dst, size_t src_offset, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) |
480 | =item $ev = $queue->enqueue_copy_buffer_to_image ($src_buffer, $dst_image, $src_offset, $dst_x, $dst_y, $dst_z, $width, $height, $depth, $wait_events...) |
469 | |
481 | |
470 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueCopyBufferToImage.html>. |
482 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueCopyBufferToImage.html>. |
471 | |
483 | |
472 | =item $ev = $queue->enqueue_copy_image (OpenCL::Image src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) |
484 | =item $ev = $queue->enqueue_copy_image ($src_image, $dst_image, $src_x, $src_y, $src_z, $dst_x, $dst_y, $dst_z, $width, $height, $depth, $wait_events...) |
473 | |
485 | |
474 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueCopyImage.html> |
486 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueCopyImage.html> |
475 | |
487 | |
476 | =item $ev = $queue->enqueue_copy_image_to_buffer (OpenCL::Image src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t dst_offset, ...) |
488 | =item $ev = $queue->enqueue_copy_image_to_buffer ($src_image, $dst_image, $src_x, $src_y, $src_z, $width, $height, $depth, $dst_offset, $wait_events...) |
477 | |
489 | |
478 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueCopyImageToBuffer.html> |
490 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueCopyImageToBuffer.html> |
479 | |
491 | |
480 | =item $ev = $queue->enqueue_task ($kernel, $wait_events...) |
492 | =item $ev = $queue->enqueue_task ($kernel, $wait_events...) |
481 | |
493 | |
… | |
… | |
646 | package OpenCL; |
658 | package OpenCL; |
647 | |
659 | |
648 | use common::sense; |
660 | use common::sense; |
649 | |
661 | |
650 | BEGIN { |
662 | BEGIN { |
651 | our $VERSION = '0.14'; |
663 | our $VERSION = '0.15'; |
652 | |
664 | |
653 | require XSLoader; |
665 | require XSLoader; |
654 | XSLoader::load (__PACKAGE__, $VERSION); |
666 | XSLoader::load (__PACKAGE__, $VERSION); |
655 | |
667 | |
656 | @OpenCL::Buffer::ISA = |
668 | @OpenCL::Buffer::ISA = |