ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/OpenCL/OpenCL.xs
(Generate patch)

Comparing OpenCL/OpenCL.xs (file contents):
Revision 1.63 by root, Tue May 1 22:25:13 2012 UTC vs.
Revision 1.64 by root, Thu May 3 23:30:08 2012 UTC

1#include "EXTERN.h" 1#include "EXTERN.h"
2#include "perl.h" 2#include "perl.h"
3#include "XSUB.h" 3#include "XSUB.h"
4
5#include "ecb.h"//D
4 6
5#define X_STACKSIZE sizeof (void *) * 512 * 1024 // 2-4mb should be enough, really 7#define X_STACKSIZE sizeof (void *) * 512 * 1024 // 2-4mb should be enough, really
6#include "xthread.h" 8#include "xthread.h"
7#include "schmorp.h" 9#include "schmorp.h"
8 10
44typedef cl_command_queue OpenCL__Queue; 46typedef cl_command_queue OpenCL__Queue;
45typedef cl_mem OpenCL__Memory; 47typedef cl_mem OpenCL__Memory;
46typedef cl_mem OpenCL__Buffer; 48typedef cl_mem OpenCL__Buffer;
47typedef cl_mem OpenCL__BufferObj; 49typedef cl_mem OpenCL__BufferObj;
48typedef cl_mem OpenCL__Image; 50typedef cl_mem OpenCL__Image;
49typedef cl_mem OpenCL__Image2D;
50typedef cl_mem OpenCL__Image3D;
51typedef cl_mem OpenCL__Memory_ornull; 51typedef cl_mem OpenCL__Memory_ornull;
52typedef cl_mem OpenCL__Buffer_ornull; 52typedef cl_mem OpenCL__Buffer_ornull;
53typedef cl_mem OpenCL__Image_ornull; 53typedef cl_mem OpenCL__Image_ornull;
54typedef cl_mem OpenCL__Image2D_ornull;
55typedef cl_mem OpenCL__Image3D_ornull;
56typedef cl_sampler OpenCL__Sampler; 54typedef cl_sampler OpenCL__Sampler;
57typedef cl_program OpenCL__Program; 55typedef cl_program OpenCL__Program;
58typedef cl_kernel OpenCL__Kernel; 56typedef cl_kernel OpenCL__Kernel;
59typedef cl_event OpenCL__Event; 57typedef cl_event OpenCL__Event;
60typedef cl_event OpenCL__UserEvent; 58typedef cl_event OpenCL__UserEvent;
133} 131}
134 132
135/*****************************************************************************/ 133/*****************************************************************************/
136 134
137/* up to two temporary buffers */ 135/* up to two temporary buffers */
138static void * 136static void * ecb_noinline
139tmpbuf (size_t size) 137tmpbuf (size_t size)
140{ 138{
141 enum { buffers = 3 }; 139 enum { buffers = 4 };
142 static int idx; 140 static int idx;
143 static void *buf [buffers]; 141 static void *buf [buffers];
144 static size_t len [buffers]; 142 static size_t len [buffers];
145 143
146 idx = (idx + 1) % buffers; 144 idx = (idx + 1) % buffers;
219 if (res) \ 217 if (res) \
220 FAIL (name); 218 FAIL (name);
221 219
222/*****************************************************************************/ 220/*****************************************************************************/
223 221
224static cl_context_properties * 222static cl_context_properties * ecb_noinline
225SvCONTEXTPROPERTIES (const char *func, const char *svname, SV *sv, cl_context_properties *extra, int extracount) 223SvCONTEXTPROPERTIES (const char *func, const char *svname, SV *sv, cl_context_properties *extra, int extracount)
226{ 224{
227 if (!sv || !SvOK (sv)) 225 if (!sv || !SvOK (sv))
228 if (extra) 226 if (extra)
229 sv = sv_2mortal (newRV_noinc ((SV *)newAV ())); // slow, but rarely used hopefully 227 sv = sv_2mortal (newRV_noinc ((SV *)newAV ())); // slow, but rarely used hopefully
297#define PUSH_CLOBJ(stash,id) PUSHs (new_clobj ((stash), (IV)(id))) 295#define PUSH_CLOBJ(stash,id) PUSHs (new_clobj ((stash), (IV)(id)))
298#define XPUSH_CLOBJ(stash,id) XPUSHs (new_clobj ((stash), (IV)(id))) 296#define XPUSH_CLOBJ(stash,id) XPUSHs (new_clobj ((stash), (IV)(id)))
299 297
300/* cl objects are either \$iv, or [$iv, ...] */ 298/* cl objects are either \$iv, or [$iv, ...] */
301/* they can be upgraded at runtime to the array form */ 299/* they can be upgraded at runtime to the array form */
302static void * 300static void * ecb_noinline
303SvCLOBJ (const char *func, const char *svname, SV *sv, const char *pkg) 301SvCLOBJ (const char *func, const char *svname, SV *sv, const char *pkg)
304{ 302{
303 // sv_derived_from is quite slow :(
305 if (SvROK (sv) && sv_derived_from (sv, pkg)) 304 if (SvROK (sv) && sv_derived_from (sv, pkg))
306 return (void *)SvIV (SvRV (sv)); 305 return (void *)SvIV (SvRV (sv));
307 306
308 croak ("%s: %s is not of type %s", func, svname, pkg); 307 croak ("%s: %s is not of type %s", func, svname, pkg);
308}
309
310// the "no-inherit" version of the above
311static void * ecb_noinline
312SvCLOBJ_ni (const char *func, const char *svname, SV *sv, HV *stash)
313{
314 if (SvROK (sv) && SvSTASH (SvRV (sv)) == stash)
315 return (void *)SvIV (SvRV (sv));
316
317 croak ("%s: %s is not of type %s", func, svname, HvNAME (stash));
309} 318}
310 319
311/*****************************************************************************/ 320/*****************************************************************************/
312/* callback stuff */ 321/* callback stuff */
313 322
335static void (*eq_signal_func)(void *signal_arg, int value); 344static void (*eq_signal_func)(void *signal_arg, int value);
336static void *eq_signal_arg; 345static void *eq_signal_arg;
337static xmutex_t eq_lock = X_MUTEX_INIT; 346static xmutex_t eq_lock = X_MUTEX_INIT;
338static eq_item *eq_head, *eq_tail; 347static eq_item *eq_head, *eq_tail;
339 348
340static void 349static void ecb_noinline
341eq_enq (eq_vtbl *vtbl, SV *cb, void *data1, void *data2, void *data3) 350eq_enq (eq_vtbl *vtbl, SV *cb, void *data1, void *data2, void *data3)
342{ 351{
343 eq_item *item = malloc (sizeof (eq_item)); 352 eq_item *item = malloc (sizeof (eq_item));
344 353
345 item->next = 0; 354 item->next = 0;
412} 421}
413 422
414/*****************************************************************************/ 423/*****************************************************************************/
415/* context notify */ 424/* context notify */
416 425
417static void 426static void ecb_noinline
418eq_context_push (void *data1, void *data2, void *data3) 427eq_context_push (void *data1, void *data2, void *data3)
419{ 428{
420 dSP; 429 dSP;
421 PUSHs (sv_2mortal (newSVpv (data1, 0))); 430 PUSHs (sv_2mortal (newSVpv (data1, 0)));
422 PUSHs (sv_2mortal (newSVpvn (data2, (STRLEN)data3))); 431 PUSHs (sv_2mortal (newSVpvn (data2, (STRLEN)data3)));
444 { \ 453 { \
445 pfn_notify = eq_context_notify; \ 454 pfn_notify = eq_context_notify; \
446 user_data = s_get_cv (notify); \ 455 user_data = s_get_cv (notify); \
447 } 456 }
448 457
449static SV * 458static SV * ecb_noinline
450new_clobj_context (cl_context ctx, void *user_data) 459new_clobj_context (cl_context ctx, void *user_data)
451{ 460{
452 SV *sv = new_clobj (stash_context, (IV)ctx); 461 SV *sv = new_clobj (stash_context, (IV)ctx);
453 462
454 if (user_data) 463 if (user_data)
497 else 506 else
498 clReleaseProgram (arg->program); 507 clReleaseProgram (arg->program);
499 508
500 free (arg->options); 509 free (arg->options);
501 free (arg); 510 free (arg);
511
512 return 0;
502} 513}
503 514
504static void 515static void
505build_program_async (cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void *user_data) 516build_program_async (cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void *user_data)
506{ 517{
546 size_t res; 557 size_t res;
547 clGetImageInfo (img, CL_IMAGE_ROW_PITCH, sizeof (res), &res, 0); 558 clGetImageInfo (img, CL_IMAGE_ROW_PITCH, sizeof (res), &res, 0);
548 return res; 559 return res;
549} 560}
550 561
551static cl_event * 562static cl_event * ecb_noinline
552event_list (SV **items, cl_uint *rcount, cl_event extra) 563event_list (SV **items, cl_uint *rcount, cl_event extra)
553{ 564{
554 cl_uint count = *rcount; 565 cl_uint count = *rcount;
555 566
556 if (count > 0x7fffffff) // yeha, it's a hack - the caller might have underflowed 567 if (count > 0x7fffffffU) // yeah, it's a hack - the caller might have underflowed
557 count = 0; 568 *rcount = count = 0;
558 569
559 if (!count && !extra) 570 if (!count && !extra)
560 return 0; 571 return 0;
561 572
562 cl_event *list = tmpbuf (sizeof (cl_event) * (count + 1)); 573 cl_event *list = tmpbuf (sizeof (cl_event) * (count + 1));
775 786
776void 787void
777context_from_type (cl_context_properties *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, SV *notify = &PL_sv_undef) 788context_from_type (cl_context_properties *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, SV *notify = &PL_sv_undef)
778 PPCODE: 789 PPCODE:
779 CONTEXT_NOTIFY_CALLBACK; 790 CONTEXT_NOTIFY_CALLBACK;
780 NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (properties, type, 0, 0, &res)); 791 NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (properties, type, pfn_notify, user_data, &res));
781 XPUSH_CLOBJ_CONTEXT; 792 XPUSH_CLOBJ_CONTEXT;
782 793
783void 794void
784context (FUTURE properties, FUTURE devices, FUTURE notify) 795context (FUTURE properties, FUTURE devices, FUTURE notify)
785 PPCODE: 796 PPCODE:
842 EXTEND (SP, count); 853 EXTEND (SP, count);
843 for (i = 0; i < count; ++i) 854 for (i = 0; i < count; ++i)
844 PUSH_CLOBJ (stash_device, list [i]); 855 PUSH_CLOBJ (stash_device, list [i]);
845 856
846void 857void
847context (OpenCL::Platform self, cl_context_properties *properties, SV *devices, SV *notify = &PL_sv_undef) 858context (OpenCL::Platform self, SV *properties = 0, SV *devices, SV *notify = &PL_sv_undef)
848 PPCODE: 859 PPCODE:
849 if (!SvROK (devices) || SvTYPE (SvRV (devices)) != SVt_PVAV) 860 if (!SvROK (devices) || SvTYPE (SvRV (devices)) != SVt_PVAV)
850 croak ("OpenCL::Platform::context argument 'device' must be an arrayref with device objects, in call"); 861 croak ("OpenCL::Platform::context argument 'device' must be an arrayref with device objects, in call");
862
863 cl_context_properties extra[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)self };
864 cl_context_properties *props = SvCONTEXTPROPERTIES ("OpenCL::Platform::context", "properties", properties, extra, 2);
851 865
852 AV *av = (AV *)SvRV (devices); 866 AV *av = (AV *)SvRV (devices);
853 cl_uint num_devices = av_len (av) + 1; 867 cl_uint num_devices = av_len (av) + 1;
854 cl_device_id *device_list = tmpbuf (sizeof (cl_device_id) * num_devices); 868 cl_device_id *device_list = tmpbuf (sizeof (cl_device_id) * num_devices);
855 869
856 int i; 870 int i;
857 for (i = num_devices; i--; ) 871 for (i = num_devices; i--; )
858 device_list [i] = SvCLOBJ ("clCreateContext", "devices", *av_fetch (av, i, 0), "OpenCL::Device"); 872 device_list [i] = SvCLOBJ ("clCreateContext", "devices", *av_fetch (av, i, 0), "OpenCL::Device");
859 873
860 CONTEXT_NOTIFY_CALLBACK; 874 CONTEXT_NOTIFY_CALLBACK;
861 NEED_SUCCESS_ARG (cl_context ctx, CreateContext, (properties, num_devices, device_list, pfn_notify, user_data, &res)); 875 NEED_SUCCESS_ARG (cl_context ctx, CreateContext, (props, num_devices, device_list, pfn_notify, user_data, &res));
862 XPUSH_CLOBJ_CONTEXT; 876 XPUSH_CLOBJ_CONTEXT;
863 877
864void 878void
865context_from_type (OpenCL::Platform self, SV *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, SV *notify = &PL_sv_undef) 879context_from_type (OpenCL::Platform self, SV *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, SV *notify = &PL_sv_undef)
866 PPCODE: 880 PPCODE:
867 cl_context_properties extra[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)self }; 881 cl_context_properties extra[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)self };
868 cl_context_properties *props = SvCONTEXTPROPERTIES ("OpenCL::Platform::context_from_type", "properties", properties, extra, 2); 882 cl_context_properties *props = SvCONTEXTPROPERTIES ("OpenCL::Platform::context_from_type", "properties", properties, extra, 2);
869 883
870 CONTEXT_NOTIFY_CALLBACK; 884 CONTEXT_NOTIFY_CALLBACK;
871 NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (props, type, 0, 0, &res)); 885 NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (props, type, pfn_notify, user_data, &res));
872 XPUSH_CLOBJ_CONTEXT; 886 XPUSH_CLOBJ_CONTEXT;
873 887
874MODULE = OpenCL PACKAGE = OpenCL::Device 888MODULE = OpenCL PACKAGE = OpenCL::Device
875 889
876void 890void
1299 1313
1300 len2 = len; 1314 len2 = len;
1301 NEED_SUCCESS_ARG (cl_program prog, CreateProgramWithSource, (self, 1, &ptr, &len2, &res)); 1315 NEED_SUCCESS_ARG (cl_program prog, CreateProgramWithSource, (self, 1, &ptr, &len2, &res));
1302 XPUSH_CLOBJ (stash_program, prog); 1316 XPUSH_CLOBJ (stash_program, prog);
1303 1317
1318void
1319program_with_binary (OpenCL::Context self, SV *devices, SV *binaries)
1320 PPCODE:
1321 if (!SvROK (devices) || SvTYPE (SvRV (devices)) != SVt_PVAV)
1322 croak ("OpenCL::Context::program_with_binary: devices must be specified as reference to an array of device objects");
1323
1324 devices = SvRV (devices);
1325
1326 if (!SvROK (binaries) || SvTYPE (SvRV (binaries)) != SVt_PVAV)
1327 croak ("OpenCL::Context::program_with_binary: binaries must be specified as reference to an array of strings");
1328
1329 binaries = SvRV (binaries);
1330
1331 if (av_len ((AV *)devices) != av_len ((AV *)binaries))
1332 croak ("OpenCL::Context::program_with_binary: differing numbers of devices and binaries are not allowed");
1333
1334 int count = av_len ((AV *)devices) + 1;
1335 cl_device_id *device_list = tmpbuf (sizeof (*device_list) * count);
1336 size_t *length_list = tmpbuf (sizeof (*length_list) * count);
1337 const unsigned char **binary_list = tmpbuf (sizeof (*binary_list) * count);
1338 cl_int *status_list = tmpbuf (sizeof (*status_list) * count);
1339
1340 int i;
1341 for (i = 0; i < count; ++i)
1342 {
1343 device_list [i] = SvCLOBJ ("OpenCL::Context::program_with_binary", "devices", *av_fetch ((AV *)devices, i, 0), "OpenCL::Device");
1344 STRLEN len;
1345 binary_list [i] = (const unsigned char *)SvPVbyte (*av_fetch ((AV *)binaries, i, 0), len);
1346 length_list [i] = len;
1347 }
1348
1349 NEED_SUCCESS_ARG (cl_program prog, CreateProgramWithBinary, (self, count, device_list, length_list, binary_list, GIMME_V == G_ARRAY ? status_list : 0, &res));
1350
1351 EXTEND (SP, 2);
1352 PUSH_CLOBJ (stash_program, prog);
1353
1354 if (GIMME_V == G_ARRAY)
1355 {
1356 AV *av = newAV ();
1357 PUSHs (sv_2mortal (newRV_noinc ((SV *)av)));
1358
1359 for (i = count; i--; )
1360 av_store (av, i, newSViv (status_list [i]));
1361 }
1362
1304#BEGIN:context 1363#BEGIN:context
1305 1364
1306void 1365void
1307reference_count (OpenCL::Context self) 1366reference_count (OpenCL::Context self)
1308 ALIAS: 1367 ALIAS:
1403fill_image (OpenCL::Queue self, OpenCL::Image img, NV r, NV g, NV b, NV a, size_t x, size_t y, size_t z, size_t width, size_t height, size_t depth, ...) 1462fill_image (OpenCL::Queue self, OpenCL::Image img, NV r, NV g, NV b, NV a, size_t x, size_t y, size_t z, size_t width, size_t height, size_t depth, ...)
1404 ALIAS: 1463 ALIAS:
1405 enqueue_fill_image = 0 1464 enqueue_fill_image = 0
1406 PPCODE: 1465 PPCODE:
1407 cl_event ev = 0; 1466 cl_event ev = 0;
1408 STRLEN len;
1409 const size_t origin [3] = { x, y, z }; 1467 const size_t origin [3] = { x, y, z };
1410 const size_t region [3] = { width, height, depth }; 1468 const size_t region [3] = { width, height, depth };
1411 EVENT_LIST (12); 1469 EVENT_LIST (12);
1412 1470
1413 const cl_float c_f [4] = { r, g, b, a }; 1471 const cl_float c_f [4] = { r, g, b, a };
1627 1685
1628 if (ev) 1686 if (ev)
1629 XPUSH_CLOBJ (stash_event, ev); 1687 XPUSH_CLOBJ (stash_event, ev);
1630 1688
1631void 1689void
1632map_buffer (OpenCL::Queue self, OpenCL::Buffer buf, cl_bool blocking = 1, cl_map_flags map_flags = CL_MAP_READ | CL_MAP_WRITE, size_t offset = 0, size_t cb = 0, ...) 1690map_buffer (OpenCL::Queue self, OpenCL::Buffer buf, cl_bool blocking = 1, cl_map_flags map_flags = CL_MAP_READ | CL_MAP_WRITE, size_t offset = 0, SV *cb_ = &PL_sv_undef, ...)
1633 ALIAS: 1691 ALIAS:
1634 enqueue_map_buffer = 0 1692 enqueue_map_buffer = 0
1635 PPCODE: 1693 PPCODE:
1636 cl_event ev; 1694 cl_event ev;
1637 EVENT_LIST (6); 1695 EVENT_LIST (6);
1696 size_t cb = SvIV (cb_);
1697
1698 if (!SvOK (cb_))
1699 {
1700 NEED_SUCCESS (GetMemObjectInfo, (buf, CL_MEM_SIZE, sizeof (cb), &cb, 0));
1701 cb -= offset;
1702 }
1638 1703
1639 NEED_SUCCESS_ARG (void *ptr, EnqueueMapBuffer, (self, buf, blocking, map_flags, offset, cb, event_list_count, event_list_ptr, &ev, &res)); 1704 NEED_SUCCESS_ARG (void *ptr, EnqueueMapBuffer, (self, buf, blocking, map_flags, offset, cb, event_list_count, event_list_ptr, &ev, &res));
1640 XPUSHs (mapped_new (stash_mappedbuffer, self, buf, map_flags, ptr, cb, ev, 0, 0)); 1705 XPUSHs (mapped_new (stash_mappedbuffer, self, buf, map_flags, ptr, cb, ev, 0, 0));
1641 1706
1642void 1707void
1643map_image (OpenCL::Queue self, OpenCL::Image img, cl_bool blocking = 1, cl_map_flags map_flags = CL_MAP_READ | CL_MAP_WRITE, size_t x = 0, size_t y = 0, size_t z = 0, size_t width = 0, size_t height = 0, size_t depth = 0, ...) 1708map_image (OpenCL::Queue self, OpenCL::Image img, cl_bool blocking = 1, cl_map_flags map_flags = CL_MAP_READ | CL_MAP_WRITE, size_t x = 0, size_t y = 0, size_t z = 0, SV *width_ = &PL_sv_undef, SV *height_ = &PL_sv_undef, SV *depth_ = &PL_sv_undef, ...)
1644 ALIAS: 1709 ALIAS:
1645 enqueue_map_image = 0 1710 enqueue_map_image = 0
1646 PPCODE: 1711 PPCODE:
1647 cl_event ev; 1712 size_t width = SvIV (width_);
1713 if (!SvOK (width_))
1714 {
1715 NEED_SUCCESS (GetImageInfo, (img, CL_IMAGE_WIDTH, sizeof (width), &width, 0));
1716 width -= x;
1717 }
1718
1719 size_t height = SvIV (width_);
1720 if (!SvOK (height_))
1721 {
1722 NEED_SUCCESS (GetImageInfo, (img, CL_IMAGE_HEIGHT, sizeof (height), &height, 0));
1723 height -= y;
1724 }
1725
1726 size_t depth = SvIV (width_);
1727 if (!SvOK (depth_))
1728 {
1729 NEED_SUCCESS (GetImageInfo, (img, CL_IMAGE_DEPTH, sizeof (depth), &depth, 0));
1730 depth -= z;
1731
1732 // stupid opencl returns 0 for depth, but requires 1 for 2d images
1733 if (!depth)
1734 depth = 1;
1735 }
1736
1648 const size_t origin[3] = { x, y, z }; 1737 const size_t origin[3] = { x, y, z };
1649 const size_t region[3] = { width, height, depth }; 1738 const size_t region[3] = { width, height, depth };
1650 size_t row_pitch, slice_pitch; 1739 size_t row_pitch, slice_pitch;
1651 EVENT_LIST (10); 1740 EVENT_LIST (10);
1652 1741
1742 cl_event ev;
1653 NEED_SUCCESS_ARG (void *ptr, EnqueueMapImage, (self, img, blocking, map_flags, origin, region, &row_pitch, &slice_pitch, event_list_count, event_list_ptr, &ev, &res)); 1743 NEED_SUCCESS_ARG (void *ptr, EnqueueMapImage, (self, img, blocking, map_flags, origin, region, &row_pitch, &slice_pitch, event_list_count, event_list_ptr, &ev, &res));
1654 1744
1655 size_t cb = slice_pitch ? slice_pitch * region [2] 1745 size_t cb = slice_pitch ? slice_pitch * region [2]
1656 : row_pitch ? row_pitch * region [1] 1746 : row_pitch ? row_pitch * region [1]
1657 : region [0]; 1747 : region [0];
2776set (OpenCL::Mapped self, size_t offset, SV *data) 2866set (OpenCL::Mapped self, size_t offset, SV *data)
2777 CODE: 2867 CODE:
2778 STRLEN len; 2868 STRLEN len;
2779 const char *ptr = SvPVbyte (data, len); 2869 const char *ptr = SvPVbyte (data, len);
2780 2870
2781 if (offset < 0 || offset + len > self->cb) 2871 if (offset + len > self->cb)
2782 croak ("OpenCL::Mapped::set out of bound condition detected"); 2872 croak ("OpenCL::Mapped::set out of bound condition detected");
2783 2873
2784 memcpy (offset + (char *)self->ptr, ptr, len); 2874 memcpy (offset + (char *)self->ptr, ptr, len);
2785 2875
2786MODULE = OpenCL PACKAGE = OpenCL::MappedBuffer 2876MODULE = OpenCL PACKAGE = OpenCL::MappedBuffer

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines