… | |
… | |
160 | '; |
160 | '; |
161 | |
161 | |
162 | my $prog = $ctx->program_with_source ($src); |
162 | my $prog = $ctx->program_with_source ($src); |
163 | |
163 | |
164 | # build croaks on compile errors, so catch it and print the compile errors |
164 | # build croaks on compile errors, so catch it and print the compile errors |
165 | eval { $prog->build ($dev); 1 } |
165 | eval { $prog->build ($dev, "-cl-fast-relaxed-math"); 1 } |
166 | or die $prog->build_log; |
166 | or die $prog->build_log; |
167 | |
167 | |
168 | my $kernel = $prog->kernel ("squareit"); |
168 | my $kernel = $prog->kernel ("squareit"); |
169 | |
169 | |
170 | =head2 Create some input and output float buffers, then call the |
170 | =head2 Create some input and output float buffers, then call the |
… | |
… | |
260 | m.x = fabs (fmod (m.x + time * 0.05f, 4.f)) - 2.f; |
260 | m.x = fabs (fmod (m.x + time * 0.05f, 4.f)) - 2.f; |
261 | |
261 | |
262 | float2 z = m; |
262 | float2 z = m; |
263 | float2 c = (float2)(sin (time * 0.05005), cos (time * 0.06001)); |
263 | float2 c = (float2)(sin (time * 0.05005), cos (time * 0.06001)); |
264 | |
264 | |
265 | for (int i = 0; i < 100 && dot (z, z) < 4.f; ++i) |
265 | for (int i = 0; i < 25 && dot (z, z) < 4.f; ++i) |
266 | z = (float2)(z.x * z.x - z.y * z.y, 2.f * z.x * z.y) + c; |
266 | z = (float2)(z.x * z.x - z.y * z.y, 2.f * z.x * z.y) + c; |
267 | |
267 | |
268 | float3 colour = (float3)(z.x, z.y, z.x * z.y); |
268 | float3 colour = (float3)(z.x, z.y, z.x * z.y); |
269 | write_imagef (img, (int2)(get_global_id (0), get_global_id (1)), (float4)(colour * p.x * p.x, 1.)); |
269 | write_imagef (img, (int2)(get_global_id (0), get_global_id (1)), (float4)(colour * p.x * p.x, 1.)); |
270 | } |
270 | } |
… | |
… | |
288 | |
288 | |
289 | # release objects to opengl again |
289 | # release objects to opengl again |
290 | $queue->enqueue_release_gl_objects ([$tex]); |
290 | $queue->enqueue_release_gl_objects ([$tex]); |
291 | |
291 | |
292 | # wait |
292 | # wait |
293 | $queue->flush; |
293 | $queue->finish; |
294 | |
294 | |
295 | # now draw the texture, the defaults should be all right |
295 | # now draw the texture, the defaults should be all right |
296 | glTexParameterf GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST; |
296 | glTexParameterf GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST; |
297 | |
297 | |
298 | glEnable GL_TEXTURE_2D; |
298 | glEnable GL_TEXTURE_2D; |
… | |
… | |
336 | =item * Structures are often specified by flattening out their components |
336 | =item * Structures are often specified by flattening out their components |
337 | as with short vectors, and returned as arrayrefs. |
337 | as with short vectors, and returned as arrayrefs. |
338 | |
338 | |
339 | =item * When enqueuing commands, the wait list is specified by adding |
339 | =item * When enqueuing commands, the wait list is specified by adding |
340 | extra arguments to the function - anywhere a C<$wait_events...> argument |
340 | extra arguments to the function - anywhere a C<$wait_events...> argument |
341 | is documented this can be any number of event objects. |
341 | is documented this can be any number of event objects. As an extsnion |
|
|
342 | implemented by this module, C<undef> values will be ignored in the event |
|
|
343 | list. |
342 | |
344 | |
343 | =item * When enqueuing commands, if the enqueue method is called in void |
345 | =item * When enqueuing commands, if the enqueue method is called in void |
344 | context, no event is created. In all other contexts an event is returned |
346 | context, no event is created. In all other contexts an event is returned |
345 | by the method. |
347 | by the method. |
346 | |
348 | |
… | |
… | |
748 | |
750 | |
749 | =item @device_partition_property_exts = $device->affinity_domains_ext |
751 | =item @device_partition_property_exts = $device->affinity_domains_ext |
750 | |
752 | |
751 | Calls C<clGetDeviceInfo> with C<CL_DEVICE_AFFINITY_DOMAINS_EXT> and returns the result. |
753 | Calls C<clGetDeviceInfo> with C<CL_DEVICE_AFFINITY_DOMAINS_EXT> and returns the result. |
752 | |
754 | |
753 | =item $uint = $device->reference_count_ext |
755 | =item $uint = $device->reference_count_ext |
754 | |
756 | |
755 | Calls C<clGetDeviceInfo> with C<CL_DEVICE_REFERENCE_COUNT_EXT > and returns the result. |
757 | Calls C<clGetDeviceInfo> with C<CL_DEVICE_REFERENCE_COUNT_EXT > and returns the result. |
756 | |
758 | |
757 | =item @device_partition_property_exts = $device->partition_style_ext |
759 | =item @device_partition_property_exts = $device->partition_style_ext |
758 | |
760 | |
… | |
… | |
769 | =item $queue = $ctx->queue ($device, $properties) |
771 | =item $queue = $ctx->queue ($device, $properties) |
770 | |
772 | |
771 | Create a new OpenCL::Queue object from the context and the given device. |
773 | Create a new OpenCL::Queue object from the context and the given device. |
772 | |
774 | |
773 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clCreateCommandQueue.html> |
775 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clCreateCommandQueue.html> |
|
|
776 | |
|
|
777 | Example: create an out-of-order queue. |
|
|
778 | |
|
|
779 | $queue = $ctx->queue ($device, OpenCL::QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); |
774 | |
780 | |
775 | =item $ev = $ctx->user_event |
781 | =item $ev = $ctx->user_event |
776 | |
782 | |
777 | Creates a new OpenCL::UserEvent object. |
783 | Creates a new OpenCL::UserEvent object. |
778 | |
784 | |
… | |
… | |
889 | for completion, unless the method is called in void context, in which case |
895 | for completion, unless the method is called in void context, in which case |
890 | no event object is created. |
896 | no event object is created. |
891 | |
897 | |
892 | They also allow you to specify any number of other event objects that this |
898 | They also allow you to specify any number of other event objects that this |
893 | request has to wait for before it starts executing, by simply passing the |
899 | request has to wait for before it starts executing, by simply passing the |
894 | event objects as extra parameters to the enqueue methods. |
900 | event objects as extra parameters to the enqueue methods. To simplify |
|
|
901 | program design, this module ignores any C<undef> values in the list of |
|
|
902 | events. This makes it possible to code operations such as this, without |
|
|
903 | having to put a valid event object into C<$event> first: |
|
|
904 | |
|
|
905 | $event = $queue->enqueue_xxx (..., $event); |
895 | |
906 | |
896 | Queues execute in-order by default, without any parallelism, so in most |
907 | Queues execute in-order by default, without any parallelism, so in most |
897 | cases (i.e. you use only one queue) it's not necessary to wait for or |
908 | cases (i.e. you use only one queue) it's not necessary to wait for or |
898 | create event objects. |
909 | create event objects, althoguh an our of order queue is often a bit |
|
|
910 | faster. |
899 | |
911 | |
900 | =over 4 |
912 | =over 4 |
901 | |
913 | |
902 | =item $ev = $queue->enqueue_read_buffer ($buffer, $blocking, $offset, $len, $data, $wait_events...) |
914 | =item $ev = $queue->enqueue_read_buffer ($buffer, $blocking, $offset, $len, $data, $wait_events...) |
903 | |
915 | |
… | |
… | |
969 | reference to an array of local work sizes, with the same number of |
981 | reference to an array of local work sizes, with the same number of |
970 | elements as @$global_work_size. |
982 | elements as @$global_work_size. |
971 | |
983 | |
972 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueNDRangeKernel.html> |
984 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueNDRangeKernel.html> |
973 | |
985 | |
974 | =item $ev = $queue->enqueue_marker ($wait_events...) |
|
|
975 | |
|
|
976 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueMarker.html> |
|
|
977 | |
|
|
978 | =item $ev = $queue->enqueue_acquire_gl_objects ([object, ...], $wait_events...) |
986 | =item $ev = $queue->enqueue_acquire_gl_objects ([object, ...], $wait_events...) |
979 | |
987 | |
980 | Enqueues a list (an array-ref of OpenCL::Memory objects) to be acquired |
988 | Enqueues a list (an array-ref of OpenCL::Memory objects) to be acquired |
981 | for subsequent OpenCL usage. |
989 | for subsequent OpenCL usage. |
982 | |
990 | |
… | |
… | |
991 | |
999 | |
992 | =item $ev = $queue->enqueue_wait_for_events ($wait_events...) |
1000 | =item $ev = $queue->enqueue_wait_for_events ($wait_events...) |
993 | |
1001 | |
994 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueWaitForEvents.html> |
1002 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueWaitForEvents.html> |
995 | |
1003 | |
996 | =item $queue->enqueue_barrier |
1004 | =item $ev = $queue->enqueue_marker ($wait_events...) |
997 | |
1005 | |
|
|
1006 | L<http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueMarkerWithWaitList.html> |
|
|
1007 | |
|
|
1008 | =item $ev = $queue->enqueue_barrier ($wait_events...) |
|
|
1009 | |
998 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueBarrier.html> |
1010 | L<http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueBarrierWithWaitList.html> |
999 | |
1011 | |
1000 | =item $queue->flush |
1012 | =item $queue->flush |
1001 | |
1013 | |
1002 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clFlush.html> |
1014 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clFlush.html> |
1003 | |
1015 | |
… | |
… | |
1369 | |
1381 | |
1370 | This is a family of methods to set the kernel argument with the number C<$index> to the give C<$value>. |
1382 | This is a family of methods to set the kernel argument with the number C<$index> to the give C<$value>. |
1371 | |
1383 | |
1372 | TYPE is one of C<char>, C<uchar>, C<short>, C<ushort>, C<int>, C<uint>, |
1384 | TYPE is one of C<char>, C<uchar>, C<short>, C<ushort>, C<int>, C<uint>, |
1373 | C<long>, C<ulong>, C<half>, C<float>, C<double>, C<memory>, C<buffer>, |
1385 | C<long>, C<ulong>, C<half>, C<float>, C<double>, C<memory>, C<buffer>, |
1374 | C<image2d>, C<image3d>, C<sampler> or C<event>. |
1386 | C<image2d>, C<image3d>, C<sampler>, C<local> or C<event>. |
1375 | |
1387 | |
1376 | Chars and integers (including the half type) are specified as integers, |
1388 | Chars and integers (including the half type) are specified as integers, |
1377 | float and double as floating point values, memory/buffer/image2d/image3d |
1389 | float and double as floating point values, memory/buffer/image2d/image3d |
1378 | must be an object of that type or C<undef>, and sampler and event must be |
1390 | must be an object of that type or C<undef>, local-memory arguments are |
1379 | objects of that type. |
1391 | set by specifying the size, and sampler and event must be objects of that |
|
|
1392 | type. |
1380 | |
1393 | |
1381 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clSetKernelArg.html> |
1394 | L<http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clSetKernelArg.html> |
1382 | |
1395 | |
1383 | =back |
1396 | =back |
1384 | |
1397 | |
… | |
… | |
1473 | package OpenCL; |
1486 | package OpenCL; |
1474 | |
1487 | |
1475 | use common::sense; |
1488 | use common::sense; |
1476 | |
1489 | |
1477 | BEGIN { |
1490 | BEGIN { |
1478 | our $VERSION = '0.95'; |
1491 | our $VERSION = '0.96'; |
1479 | |
1492 | |
1480 | require XSLoader; |
1493 | require XSLoader; |
1481 | XSLoader::load (__PACKAGE__, $VERSION); |
1494 | XSLoader::load (__PACKAGE__, $VERSION); |
1482 | |
1495 | |
1483 | @OpenCL::Buffer::ISA = |
1496 | @OpenCL::Buffer::ISA = |