ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/OpenCL/OpenCL.xs
(Generate patch)

Comparing OpenCL/OpenCL.xs (file contents):
Revision 1.51 by root, Tue Apr 24 22:45:38 2012 UTC vs.
Revision 1.59 by root, Sun Apr 29 19:38:05 2012 UTC

270/* they can be upgraded at runtime to the array form */ 270/* they can be upgraded at runtime to the array form */
271static void * 271static void *
272SvCLOBJ (const char *func, const char *svname, SV *sv, const char *pkg) 272SvCLOBJ (const char *func, const char *svname, SV *sv, const char *pkg)
273{ 273{
274 if (SvROK (sv) && sv_derived_from (sv, pkg)) 274 if (SvROK (sv) && sv_derived_from (sv, pkg))
275 {
276 SV *rv = SvRV (sv);
277
278 if (SvTYPE (rv) == SVt_PVAV)
279 rv = AvARRAY (rv)[0];
280
281 return (void *)SvIV (SvRV (sv)); 275 return (void *)SvIV (SvRV (sv));
282 }
283 276
284 croak ("%s: %s is not of type %s", func, svname, pkg); 277 croak ("%s: %s is not of type %s", func, svname, pkg);
285}
286
287static void
288CLOBJ_push (SV *self, SV *data)
289{
290 SV *rv = SvRV (self);
291
292 if (SvTYPE (rv) != SVt_PVAV)
293 {
294 AV *av = newAV ();
295 av_push (av, rv);
296 rv = (SV *)av;
297 SvRV_set (self, rv);
298 }
299
300 av_push ((AV *)rv, data);
301}
302
303static SV *
304sv_struct (STRLEN size)
305{
306 SV *sv = newSV (size);
307 SvPOK_only (sv);
308 return sv;
309}
310
311static void *
312CLOBJ_push_struct (SV *self, STRLEN size)
313{
314 SV *sv = sv_struct (size);
315 CLOBJ_push (self, sv);
316 return SvPVX (sv);
317} 278}
318 279
319/*****************************************************************************/ 280/*****************************************************************************/
320/* callback stuff */ 281/* callback stuff */
321 282
347 308
348static void 309static void
349eq_enq (eq_vtbl *vtbl, SV *cb, void *data1, void *data2, void *data3) 310eq_enq (eq_vtbl *vtbl, SV *cb, void *data1, void *data2, void *data3)
350{ 311{
351 eq_item *item = malloc (sizeof (eq_item)); 312 eq_item *item = malloc (sizeof (eq_item));
352
353 printf ("enq(%p,%p,%p,%p,%p)\n", vtbl, cb, data1, data2, data3);//D
354 313
355 item->next = 0; 314 item->next = 0;
356 item->vtbl = vtbl; 315 item->vtbl = vtbl;
357 item->cb = cb; 316 item->cb = cb;
358 item->data1 = data1; 317 item->data1 = data1;
383 X_UNLOCK (eq_lock); 342 X_UNLOCK (eq_lock);
384 343
385 return res; 344 return res;
386} 345}
387 346
388#if 0
389static void
390mem_free (pTHX_ void *p)
391{
392 free (p);
393}
394//SAVEDESTRUCTOR_X (mem_free, item);
395#endif
396
397static void 347static void
398eq_poll (void) 348eq_poll (void)
399{ 349{
400 eq_item *item; 350 eq_item *item;
401 351
428eq_poll_interrupt (pTHX_ void *c_arg, int value) 378eq_poll_interrupt (pTHX_ void *c_arg, int value)
429{ 379{
430 eq_poll (); 380 eq_poll ();
431} 381}
432 382
383/*****************************************************************************/
433/* context notify */ 384/* context notify */
434 385
435static void 386static void
436eq_context_push (void *data1, void *data2, void *data3) 387eq_context_push (void *data1, void *data2, void *data3)
437{ 388{
438 dSP; 389 dSP;
439 PUSHs (sv_2mortal (newSVpv (data1, 0))); 390 PUSHs (sv_2mortal (newSVpv (data1, 0)));
440 PUSHs (sv_2mortal (newSVpvn (data2, (STRLEN)data3))); 391 PUSHs (sv_2mortal (newSVpvn (data2, (STRLEN)data3)));
441 PUTBACK; 392 PUTBACK;
393
394 free (data1);
395 free (data2);
442} 396}
443 397
444static eq_vtbl eq_context_vtbl = { 0, eq_context_push }; 398static eq_vtbl eq_context_vtbl = { 0, eq_context_push };
445 399
400static void CL_CALLBACK
401eq_context_notify (const char *msg, const void *pvt, size_t cb, void *user_data)
402{
403 void *pvt_copy = malloc (cb);
404 memcpy (pvt_copy, pvt, cb);
405 eq_enq (&eq_context_vtbl, user_data, strdup (msg), pvt_copy, (void *)cb);
406}
407
408#define CONTEXT_NOTIFY_CALLBACK \
409 void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *) = context_default_notify; \
410 void *user_data = 0; \
411 \
412 if (SvOK (notify)) \
413 { \
414 pfn_notify = eq_context_notify; \
415 user_data = s_get_cv (notify); \
416 }
417
418static SV *
419new_clobj_context (cl_context ctx, void *user_data)
420{
421 SV *sv = NEW_CLOBJ ("OpenCL::Context", ctx);
422
423 if (user_data)
424 sv_magicext (SvRV (sv), user_data, PERL_MAGIC_ext, 0, 0, 0);
425
426 return sv;
427}
428
429#define XPUSH_CLOBJ_CONTEXT XPUSHs (new_clobj_context (ctx, user_data));
430
431/*****************************************************************************/
446/* build/compile/link notify */ 432/* build/compile/link notify */
447 433
448static void 434static void
449eq_program_push (void *data1, void *data2, void *data3) 435eq_program_push (void *data1, void *data2, void *data3)
450{ 436{
497 483
498 xthread_t id; 484 xthread_t id;
499 thread_create (&id, build_program_thread, arg); 485 thread_create (&id, build_program_thread, arg);
500} 486}
501 487
488/*****************************************************************************/
502/* event objects */ 489/* event objects */
503 490
504static void 491static void
505eq_event_push (void *data1, void *data2, void *data3) 492eq_event_push (void *data1, void *data2, void *data3)
506{ 493{
514 501
515static void CL_CALLBACK 502static void CL_CALLBACK
516eq_event_notify (cl_event event, cl_int event_command_exec_status, void *user_data) 503eq_event_notify (cl_event event, cl_int event_command_exec_status, void *user_data)
517{ 504{
518 clRetainEvent (event); 505 clRetainEvent (event);
519 eq_enq (&eq_event_vtbl, user_data, (void *)event, (void *)event_command_exec_status, 0); 506 eq_enq (&eq_event_vtbl, user_data, (void *)event, (void *)(IV)event_command_exec_status, 0);
520} 507}
521 508
522/*****************************************************************************/ 509/*****************************************************************************/
523 510
524static size_t 511static size_t
614 RETVAL = res; 601 RETVAL = res;
615 OUTPUT: 602 OUTPUT:
616 RETVAL 603 RETVAL
617 604
618const char * 605const char *
619err2str (cl_int err) 606err2str (cl_int err = res)
620 607
621const char * 608const char *
622enum2str (cl_uint value) 609enum2str (cl_uint value)
623 610
624void 611void
635 EXTEND (SP, count); 622 EXTEND (SP, count);
636 for (i = 0; i < count; ++i) 623 for (i = 0; i < count; ++i)
637 PUSH_CLOBJ ("OpenCL::Platform", list [i]); 624 PUSH_CLOBJ ("OpenCL::Platform", list [i]);
638 625
639void 626void
640context_from_type (cl_context_properties *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, FUTURE notify = 0) 627context_from_type (cl_context_properties *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, SV *notify = &PL_sv_undef)
641 PPCODE: 628 PPCODE:
629 CONTEXT_NOTIFY_CALLBACK;
642 NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (properties, type, 0, 0, &res)); 630 NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (properties, type, 0, 0, &res));
643 XPUSH_CLOBJ ("OpenCL::Context", ctx); 631 XPUSH_CLOBJ_CONTEXT;
644 632
645#if 0
646
647void 633void
648context (cl_context_properties *properties = 0, FUTURE devices, FUTURE notify = 0) 634context (FUTURE properties, FUTURE devices, FUTURE notify)
649 PPCODE: 635 PPCODE:
650 /* der Gipfel der Kunst */ 636 /* der Gipfel der Kunst */
651
652#endif
653 637
654void 638void
655wait_for_events (...) 639wait_for_events (...)
656 CODE: 640 CODE:
657 EVENT_LIST (0, items); 641 EVENT_LIST (0, items);
708 EXTEND (SP, count); 692 EXTEND (SP, count);
709 for (i = 0; i < count; ++i) 693 for (i = 0; i < count; ++i)
710 PUSHs (sv_setref_pv (sv_newmortal (), "OpenCL::Device", list [i])); 694 PUSHs (sv_setref_pv (sv_newmortal (), "OpenCL::Device", list [i]));
711 695
712void 696void
713context (OpenCL::Platform self, cl_context_properties *properties, SV *devices, SV *notify = 0) 697context (OpenCL::Platform self, cl_context_properties *properties, SV *devices, SV *notify = &PL_sv_undef)
714 PPCODE: 698 PPCODE:
715 if (!SvROK (devices) || SvTYPE (SvRV (devices)) != SVt_PVAV) 699 if (!SvROK (devices) || SvTYPE (SvRV (devices)) != SVt_PVAV)
716 croak ("OpenCL::Platform::context argument 'device' must be an arrayref with device objects, in call"); 700 croak ("OpenCL::Platform::context argument 'device' must be an arrayref with device objects, in call");
717 701
718 AV *av = (AV *)SvRV (devices); 702 AV *av = (AV *)SvRV (devices);
721 705
722 int i; 706 int i;
723 for (i = num_devices; i--; ) 707 for (i = num_devices; i--; )
724 device_list [i] = SvCLOBJ ("clCreateContext", "devices", *av_fetch (av, i, 0), "OpenCL::Device"); 708 device_list [i] = SvCLOBJ ("clCreateContext", "devices", *av_fetch (av, i, 0), "OpenCL::Device");
725 709
726 void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *) = context_default_notify; 710 CONTEXT_NOTIFY_CALLBACK;
727 void *user_data = 0;
728
729 NEED_SUCCESS_ARG (cl_context ctx, CreateContext, (properties, num_devices, device_list, pfn_notify, user_data, &res)); 711 NEED_SUCCESS_ARG (cl_context ctx, CreateContext, (properties, num_devices, device_list, pfn_notify, user_data, &res));
730 XPUSH_CLOBJ ("OpenCL::Context", ctx); 712 XPUSH_CLOBJ_CONTEXT;
731 713
732void 714void
733context_from_type (OpenCL::Platform self, SV *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, FUTURE notify = 0) 715context_from_type (OpenCL::Platform self, SV *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, SV *notify = &PL_sv_undef)
734 PPCODE: 716 PPCODE:
735 cl_context_properties extra[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)self }; 717 cl_context_properties extra[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)self };
736 cl_context_properties *props = SvCONTEXTPROPERTIES ("OpenCL::Platform::context_from_type", "properties", properties, extra, 2); 718 cl_context_properties *props = SvCONTEXTPROPERTIES ("OpenCL::Platform::context_from_type", "properties", properties, extra, 2);
719
720 CONTEXT_NOTIFY_CALLBACK;
737 NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (props, type, 0, 0, &res)); 721 NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (props, type, 0, 0, &res));
738 XPUSH_CLOBJ ("OpenCL::Context", ctx); 722 XPUSH_CLOBJ_CONTEXT;
739 723
740MODULE = OpenCL PACKAGE = OpenCL::Device 724MODULE = OpenCL PACKAGE = OpenCL::Device
741 725
742void 726void
743info (OpenCL::Device self, cl_device_info name) 727info (OpenCL::Device self, cl_device_info name)
1012 XPUSH_CLOBJ ("OpenCL::BufferObj", mem); 996 XPUSH_CLOBJ ("OpenCL::BufferObj", mem);
1013 997
1014#if CL_VERSION_1_2 998#if CL_VERSION_1_2
1015 999
1016void 1000void
1017image (OpenCL::Context self, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, cl_mem_object_type type, size_t width, size_t height, size_t depth, size_t array_size = 0, size_t row_pitch = 0, size_t slice_pitch = 0, cl_uint num_mip_level = 0, cl_uint num_samples = 0, SV *data = &PL_sv_undef) 1001image (OpenCL::Context self, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, cl_mem_object_type type, size_t width, size_t height, size_t depth = 0, size_t array_size = 0, size_t row_pitch = 0, size_t slice_pitch = 0, cl_uint num_mip_level = 0, cl_uint num_samples = 0, SV *data = &PL_sv_undef)
1018 PPCODE: 1002 PPCODE:
1019 STRLEN len; 1003 STRLEN len;
1020 char *ptr = SvOK (data) ? SvPVbyte (data, len) : 0; 1004 char *ptr = SvOK (data) ? SvPVbyte (data, len) : 0;
1021 const cl_image_format format = { channel_order, channel_type }; 1005 const cl_image_format format = { channel_order, channel_type };
1022 const cl_image_desc desc = { 1006 const cl_image_desc desc = {
1215DESTROY (OpenCL::Queue self) 1199DESTROY (OpenCL::Queue self)
1216 CODE: 1200 CODE:
1217 clReleaseCommandQueue (self); 1201 clReleaseCommandQueue (self);
1218 1202
1219void 1203void
1220enqueue_read_buffer (OpenCL::Queue self, OpenCL::Buffer mem, cl_bool blocking, size_t offset, size_t len, SV *data, ...) 1204read_buffer (OpenCL::Queue self, OpenCL::Buffer mem, cl_bool blocking, size_t offset, size_t len, SV *data, ...)
1205 ALIAS:
1206 enqueue_read_buffer = 0
1221 PPCODE: 1207 PPCODE:
1222 cl_event ev = 0; 1208 cl_event ev = 0;
1223 EVENT_LIST (6, items - 6); 1209 EVENT_LIST (6, items - 6);
1224 1210
1225 SvUPGRADE (data, SVt_PV); 1211 SvUPGRADE (data, SVt_PV);
1230 1216
1231 if (ev) 1217 if (ev)
1232 XPUSH_CLOBJ ("OpenCL::Event", ev); 1218 XPUSH_CLOBJ ("OpenCL::Event", ev);
1233 1219
1234void 1220void
1235enqueue_write_buffer (OpenCL::Queue self, OpenCL::Buffer mem, cl_bool blocking, size_t offset, SV *data, ...) 1221write_buffer (OpenCL::Queue self, OpenCL::Buffer mem, cl_bool blocking, size_t offset, SV *data, ...)
1222 ALIAS:
1223 enqueue_write_buffer = 0
1236 PPCODE: 1224 PPCODE:
1237 cl_event ev = 0; 1225 cl_event ev = 0;
1238 STRLEN len; 1226 STRLEN len;
1239 char *ptr = SvPVbyte (data, len); 1227 char *ptr = SvPVbyte (data, len);
1240 EVENT_LIST (5, items - 5); 1228 EVENT_LIST (5, items - 5);
1245 XPUSH_CLOBJ ("OpenCL::Event", ev); 1233 XPUSH_CLOBJ ("OpenCL::Event", ev);
1246 1234
1247#if CL_VERSION_1_2 1235#if CL_VERSION_1_2
1248 1236
1249void 1237void
1250enqueue_fill_buffer (OpenCL::Queue self, OpenCL::Buffer mem, SV *data, size_t offset, size_t size, ...) 1238fill_buffer (OpenCL::Queue self, OpenCL::Buffer mem, SV *data, size_t offset, size_t size, ...)
1239 ALIAS:
1240 enqueue_fill_buffer = 0
1251 PPCODE: 1241 PPCODE:
1252 cl_event ev = 0; 1242 cl_event ev = 0;
1253 STRLEN len; 1243 STRLEN len;
1254 char *ptr = SvPVbyte (data, len); 1244 char *ptr = SvPVbyte (data, len);
1255 EVENT_LIST (5, items - 5); 1245 EVENT_LIST (5, items - 5);
1258 1248
1259 if (ev) 1249 if (ev)
1260 XPUSH_CLOBJ ("OpenCL::Event", ev); 1250 XPUSH_CLOBJ ("OpenCL::Event", ev);
1261 1251
1262void 1252void
1263enqueue_fill_image (OpenCL::Queue self, OpenCL::Image img, NV r, NV g, NV b, NV a, size_t x, size_t y, size_t z, size_t width, size_t height, size_t depth, ...) 1253fill_image (OpenCL::Queue self, OpenCL::Image img, NV r, NV g, NV b, NV a, size_t x, size_t y, size_t z, size_t width, size_t height, size_t depth, ...)
1254 ALIAS:
1255 enqueue_fill_image = 0
1264 PPCODE: 1256 PPCODE:
1265 cl_event ev = 0; 1257 cl_event ev = 0;
1266 STRLEN len; 1258 STRLEN len;
1267 const size_t origin [3] = { x, y, z }; 1259 const size_t origin [3] = { x, y, z };
1268 const size_t region [3] = { width, height, depth }; 1260 const size_t region [3] = { width, height, depth };
1277 NEED_SUCCESS (GetImageInfo, (img, CL_IMAGE_FORMAT, sizeof (format), &format, 0)); 1269 NEED_SUCCESS (GetImageInfo, (img, CL_IMAGE_FORMAT, sizeof (format), &format, 0));
1278 assert (sizeof (fus) == CL_FLOAT + 1 - CL_SNORM_INT8); 1270 assert (sizeof (fus) == CL_FLOAT + 1 - CL_SNORM_INT8);
1279 if (format.image_channel_data_type < CL_SNORM_INT8 || CL_FLOAT < format.image_channel_data_type) 1271 if (format.image_channel_data_type < CL_SNORM_INT8 || CL_FLOAT < format.image_channel_data_type)
1280 croak ("enqueue_fill_image: image has unsupported channel type, only opencl 1.2 channel types supported."); 1272 croak ("enqueue_fill_image: image has unsupported channel type, only opencl 1.2 channel types supported.");
1281 1273
1282 NEED_SUCCESS (EnqueueFillImage, (self, img, c_fus [fus [format.image_channel_data_type]], 1274 NEED_SUCCESS (EnqueueFillImage, (self, img, c_fus [fus [format.image_channel_data_type - CL_SNORM_INT8]],
1283 origin, region, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); 1275 origin, region, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0));
1284 1276
1285 if (ev) 1277 if (ev)
1286 XPUSH_CLOBJ ("OpenCL::Event", ev); 1278 XPUSH_CLOBJ ("OpenCL::Event", ev);
1287 1279
1288#endif 1280#endif
1289 1281
1290void 1282void
1291enqueue_copy_buffer (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_offset, size_t dst_offset, size_t len, ...) 1283copy_buffer (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_offset, size_t dst_offset, size_t len, ...)
1284 ALIAS:
1285 enqueue_copy_buffer = 0
1292 PPCODE: 1286 PPCODE:
1293 cl_event ev = 0; 1287 cl_event ev = 0;
1294 EVENT_LIST (6, items - 6); 1288 EVENT_LIST (6, items - 6);
1295 1289
1296 NEED_SUCCESS (EnqueueCopyBuffer, (self, src, dst, src_offset, dst_offset, len, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); 1290 NEED_SUCCESS (EnqueueCopyBuffer, (self, src, dst, src_offset, dst_offset, len, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0));
1297 1291
1298 if (ev) 1292 if (ev)
1299 XPUSH_CLOBJ ("OpenCL::Event", ev); 1293 XPUSH_CLOBJ ("OpenCL::Event", ev);
1300 1294
1301void 1295void
1302enqueue_read_buffer_rect (OpenCL::Queue self, OpenCL::Memory buf, cl_bool blocking, size_t buf_x, size_t buf_y, size_t buf_z, size_t host_x, size_t host_y, size_t host_z, size_t width, size_t height, size_t depth, size_t buf_row_pitch, size_t buf_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, SV *data, ...) 1296read_buffer_rect (OpenCL::Queue self, OpenCL::Memory buf, cl_bool blocking, size_t buf_x, size_t buf_y, size_t buf_z, size_t host_x, size_t host_y, size_t host_z, size_t width, size_t height, size_t depth, size_t buf_row_pitch, size_t buf_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, SV *data, ...)
1297 ALIAS:
1298 enqueue_read_buffer_rect = 0
1303 PPCODE: 1299 PPCODE:
1304 cl_event ev = 0; 1300 cl_event ev = 0;
1305 const size_t buf_origin [3] = { buf_x , buf_y , buf_z }; 1301 const size_t buf_origin [3] = { buf_x , buf_y , buf_z };
1306 const size_t host_origin[3] = { host_x, host_y, host_z }; 1302 const size_t host_origin[3] = { host_x, host_y, host_z };
1307 const size_t region[3] = { width, height, depth }; 1303 const size_t region[3] = { width, height, depth };
1329 1325
1330 if (ev) 1326 if (ev)
1331 XPUSH_CLOBJ ("OpenCL::Event", ev); 1327 XPUSH_CLOBJ ("OpenCL::Event", ev);
1332 1328
1333void 1329void
1334enqueue_write_buffer_rect (OpenCL::Queue self, OpenCL::Memory buf, cl_bool blocking, size_t buf_x, size_t buf_y, size_t buf_z, size_t host_x, size_t host_y, size_t host_z, size_t width, size_t height, size_t depth, size_t buf_row_pitch, size_t buf_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, SV *data, ...) 1330write_buffer_rect (OpenCL::Queue self, OpenCL::Memory buf, cl_bool blocking, size_t buf_x, size_t buf_y, size_t buf_z, size_t host_x, size_t host_y, size_t host_z, size_t width, size_t height, size_t depth, size_t buf_row_pitch, size_t buf_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, SV *data, ...)
1331 ALIAS:
1332 enqueue_write_buffer_rect = 0
1335 PPCODE: 1333 PPCODE:
1336 cl_event ev = 0; 1334 cl_event ev = 0;
1337 const size_t buf_origin [3] = { buf_x , buf_y , buf_z }; 1335 const size_t buf_origin [3] = { buf_x , buf_y , buf_z };
1338 const size_t host_origin[3] = { host_x, host_y, host_z }; 1336 const size_t host_origin[3] = { host_x, host_y, host_z };
1339 const size_t region[3] = { width, height, depth }; 1337 const size_t region[3] = { width, height, depth };
1362 1360
1363 if (ev) 1361 if (ev)
1364 XPUSH_CLOBJ ("OpenCL::Event", ev); 1362 XPUSH_CLOBJ ("OpenCL::Event", ev);
1365 1363
1366void 1364void
1367enqueue_copy_buffer_rect (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, ...) 1365copy_buffer_rect (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, ...)
1366 ALIAS:
1367 enqueue_copy_buffer_rect = 0
1368 PPCODE: 1368 PPCODE:
1369 cl_event ev = 0; 1369 cl_event ev = 0;
1370 const size_t src_origin[3] = { src_x, src_y, src_z }; 1370 const size_t src_origin[3] = { src_x, src_y, src_z };
1371 const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; 1371 const size_t dst_origin[3] = { dst_x, dst_y, dst_z };
1372 const size_t region[3] = { width, height, depth }; 1372 const size_t region[3] = { width, height, depth };
1376 1376
1377 if (ev) 1377 if (ev)
1378 XPUSH_CLOBJ ("OpenCL::Event", ev); 1378 XPUSH_CLOBJ ("OpenCL::Event", ev);
1379 1379
1380void 1380void
1381enqueue_read_image (OpenCL::Queue self, OpenCL::Image src, cl_bool blocking, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...) 1381read_image (OpenCL::Queue self, OpenCL::Image src, cl_bool blocking, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...)
1382 ALIAS:
1383 enqueue_read_image = 0
1382 PPCODE: 1384 PPCODE:
1383 cl_event ev = 0; 1385 cl_event ev = 0;
1384 const size_t src_origin[3] = { src_x, src_y, src_z }; 1386 const size_t src_origin[3] = { src_x, src_y, src_z };
1385 const size_t region[3] = { width, height, depth }; 1387 const size_t region[3] = { width, height, depth };
1386 EVENT_LIST (12, items - 12); 1388 EVENT_LIST (12, items - 12);
1401 1403
1402 if (ev) 1404 if (ev)
1403 XPUSH_CLOBJ ("OpenCL::Event", ev); 1405 XPUSH_CLOBJ ("OpenCL::Event", ev);
1404 1406
1405void 1407void
1406enqueue_write_image (OpenCL::Queue self, OpenCL::Image dst, cl_bool blocking, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...) 1408write_image (OpenCL::Queue self, OpenCL::Image dst, cl_bool blocking, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...)
1409 ALIAS:
1410 enqueue_write_image = 0
1407 PPCODE: 1411 PPCODE:
1408 cl_event ev = 0; 1412 cl_event ev = 0;
1409 const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; 1413 const size_t dst_origin[3] = { dst_x, dst_y, dst_z };
1410 const size_t region[3] = { width, height, depth }; 1414 const size_t region[3] = { width, height, depth };
1411 STRLEN len; 1415 STRLEN len;
1427 1431
1428 if (ev) 1432 if (ev)
1429 XPUSH_CLOBJ ("OpenCL::Event", ev); 1433 XPUSH_CLOBJ ("OpenCL::Event", ev);
1430 1434
1431void 1435void
1432enqueue_copy_image (OpenCL::Queue self, OpenCL::Image src, OpenCL::Image dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) 1436copy_image (OpenCL::Queue self, OpenCL::Image src, OpenCL::Image dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...)
1437 ALIAS:
1438 enqueue_copy_image = 0
1433 PPCODE: 1439 PPCODE:
1434 cl_event ev = 0; 1440 cl_event ev = 0;
1435 const size_t src_origin[3] = { src_x, src_y, src_z }; 1441 const size_t src_origin[3] = { src_x, src_y, src_z };
1436 const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; 1442 const size_t dst_origin[3] = { dst_x, dst_y, dst_z };
1437 const size_t region[3] = { width, height, depth }; 1443 const size_t region[3] = { width, height, depth };
1441 1447
1442 if (ev) 1448 if (ev)
1443 XPUSH_CLOBJ ("OpenCL::Event", ev); 1449 XPUSH_CLOBJ ("OpenCL::Event", ev);
1444 1450
1445void 1451void
1446enqueue_copy_image_to_buffer (OpenCL::Queue self, OpenCL::Image src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t dst_offset, ...) 1452copy_image_to_buffer (OpenCL::Queue self, OpenCL::Image src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t dst_offset, ...)
1453 ALIAS:
1454 enqueue_copy_image_to_buffer = 0
1447 PPCODE: 1455 PPCODE:
1448 cl_event ev = 0; 1456 cl_event ev = 0;
1449 const size_t src_origin[3] = { src_x, src_y, src_z }; 1457 const size_t src_origin[3] = { src_x, src_y, src_z };
1450 const size_t region[3] = { width, height, depth }; 1458 const size_t region[3] = { width, height, depth };
1451 EVENT_LIST (10, items - 10); 1459 EVENT_LIST (10, items - 10);
1454 1462
1455 if (ev) 1463 if (ev)
1456 XPUSH_CLOBJ ("OpenCL::Event", ev); 1464 XPUSH_CLOBJ ("OpenCL::Event", ev);
1457 1465
1458void 1466void
1459enqueue_copy_buffer_to_image (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Image dst, size_t src_offset, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) 1467copy_buffer_to_image (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Image dst, size_t src_offset, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...)
1468 ALIAS:
1469 enqueue_copy_buffer_to_image = 0
1460 PPCODE: 1470 PPCODE:
1461 cl_event ev = 0; 1471 cl_event ev = 0;
1462 const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; 1472 const size_t dst_origin[3] = { dst_x, dst_y, dst_z };
1463 const size_t region[3] = { width, height, depth }; 1473 const size_t region[3] = { width, height, depth };
1464 EVENT_LIST (10, items - 10); 1474 EVENT_LIST (10, items - 10);
1467 1477
1468 if (ev) 1478 if (ev)
1469 XPUSH_CLOBJ ("OpenCL::Event", ev); 1479 XPUSH_CLOBJ ("OpenCL::Event", ev);
1470 1480
1471void 1481void
1472enqueue_task (OpenCL::Queue self, OpenCL::Kernel kernel, ...) 1482task (OpenCL::Queue self, OpenCL::Kernel kernel, ...)
1483 ALIAS:
1484 enqueue_task = 0
1473 PPCODE: 1485 PPCODE:
1474 cl_event ev = 0; 1486 cl_event ev = 0;
1475 EVENT_LIST (2, items - 2); 1487 EVENT_LIST (2, items - 2);
1476 1488
1477 NEED_SUCCESS (EnqueueTask, (self, kernel, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); 1489 NEED_SUCCESS (EnqueueTask, (self, kernel, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0));
1478 1490
1479 if (ev) 1491 if (ev)
1480 XPUSH_CLOBJ ("OpenCL::Event", ev); 1492 XPUSH_CLOBJ ("OpenCL::Event", ev);
1481 1493
1482void 1494void
1483enqueue_nd_range_kernel (OpenCL::Queue self, OpenCL::Kernel kernel, SV *global_work_offset, SV *global_work_size, SV *local_work_size = &PL_sv_undef, ...) 1495nd_range_kernel (OpenCL::Queue self, OpenCL::Kernel kernel, SV *global_work_offset, SV *global_work_size, SV *local_work_size = &PL_sv_undef, ...)
1496 ALIAS:
1497 enqueue_nd_range_kernel = 0
1484 PPCODE: 1498 PPCODE:
1485 cl_event ev = 0; 1499 cl_event ev = 0;
1486 size_t *gwo = 0, *gws, *lws = 0; 1500 size_t *gwo = 0, *gws, *lws = 0;
1487 int gws_len; 1501 int gws_len;
1488 size_t *lists; 1502 size_t *lists;
1496 1510
1497 lists = tmpbuf (sizeof (size_t) * 3 * gws_len); 1511 lists = tmpbuf (sizeof (size_t) * 3 * gws_len);
1498 1512
1499 gws = lists + gws_len * 0; 1513 gws = lists + gws_len * 0;
1500 for (i = 0; i < gws_len; ++i) 1514 for (i = 0; i < gws_len; ++i)
1515 {
1501 gws [i] = SvIV (AvARRAY (SvRV (global_work_size))[i]); 1516 gws [i] = SvIV (AvARRAY (SvRV (global_work_size))[i]);
1517 // at least nvidia crashes for 0-sized work group sizes, work around
1518 if (!gws [i])
1519 croak ("clEnqueueNDRangeKernel: global_work_size[%d] is zero, must be non-zero", i);
1520 }
1502 1521
1503 if (SvOK (global_work_offset)) 1522 if (SvOK (global_work_offset))
1504 { 1523 {
1505 if (!SvROK (global_work_offset) || SvTYPE (SvRV (global_work_offset)) != SVt_PVAV) 1524 if (!SvROK (global_work_offset) || SvTYPE (SvRV (global_work_offset)) != SVt_PVAV)
1506 croak ("clEnqueueNDRangeKernel: global_work_offset must be undef or an array reference"); 1525 croak ("clEnqueueNDRangeKernel: global_work_offset must be undef or an array reference");
1514 } 1533 }
1515 1534
1516 if (SvOK (local_work_size)) 1535 if (SvOK (local_work_size))
1517 { 1536 {
1518 if ((SvOK (local_work_size) && !SvROK (local_work_size)) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV) 1537 if ((SvOK (local_work_size) && !SvROK (local_work_size)) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV)
1519 croak ("clEnqueueNDRangeKernel: global_work_size must be undef or an array reference"); 1538 croak ("clEnqueueNDRangeKernel: local_work_size must be undef or an array reference");
1520 1539
1521 if (AvFILLp (SvRV (local_work_size)) + 1 != gws_len) 1540 if (AvFILLp (SvRV (local_work_size)) + 1 != gws_len)
1522 croak ("clEnqueueNDRangeKernel: local_work_local must be undef or an array of same size as global_work_size"); 1541 croak ("clEnqueueNDRangeKernel: local_work_local must be undef or an array of same size as global_work_size");
1523 1542
1524 lws = lists + gws_len * 2; 1543 lws = lists + gws_len * 2;
1525 for (i = 0; i < gws_len; ++i) 1544 for (i = 0; i < gws_len; ++i)
1545 {
1526 lws [i] = SvIV (AvARRAY (SvRV (local_work_size))[i]); 1546 lws [i] = SvIV (AvARRAY (SvRV (local_work_size))[i]);
1547 // at least nvidia crashes for 0-sized work group sizes, work around
1548 if (!lws [i])
1549 croak ("clEnqueueNDRangeKernel: local_work_size[%d] is zero, must be non-zero", i);
1550 }
1527 } 1551 }
1528 1552
1529 NEED_SUCCESS (EnqueueNDRangeKernel, (self, kernel, gws_len, gwo, gws, lws, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); 1553 NEED_SUCCESS (EnqueueNDRangeKernel, (self, kernel, gws_len, gwo, gws, lws, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0));
1530 1554
1531 if (ev) 1555 if (ev)
1532 XPUSH_CLOBJ ("OpenCL::Event", ev); 1556 XPUSH_CLOBJ ("OpenCL::Event", ev);
1533 1557
1534#if cl_apple_gl_sharing || cl_khr_gl_sharing 1558#if cl_apple_gl_sharing || cl_khr_gl_sharing
1535 1559
1536void 1560void
1537enqueue_acquire_gl_objects (OpenCL::Queue self, SV *objects, ...) 1561acquire_gl_objects (OpenCL::Queue self, SV *objects, ...)
1562 ALIAS:
1563 enqueue_acquire_gl_objects = 0
1538 ALIAS: 1564 ALIAS:
1539 enqueue_release_gl_objects = 1 1565 enqueue_release_gl_objects = 1
1540 PPCODE: 1566 PPCODE:
1541 if (!SvROK (objects) || SvTYPE (SvRV (objects)) != SVt_PVAV) 1567 if (!SvROK (objects) || SvTYPE (SvRV (objects)) != SVt_PVAV)
1542 croak ("OpenCL::Queue::enqueue_acquire/release_gl_objects argument 'objects' must be an arrayref with memory objects, in call"); 1568 croak ("OpenCL::Queue::enqueue_acquire/release_gl_objects argument 'objects' must be an arrayref with memory objects, in call");
1560 XPUSH_CLOBJ ("OpenCL::Event", ev); 1586 XPUSH_CLOBJ ("OpenCL::Event", ev);
1561 1587
1562#endif 1588#endif
1563 1589
1564void 1590void
1565enqueue_wait_for_events (OpenCL::Queue self, ...) 1591wait_for_events (OpenCL::Queue self, ...)
1592 ALIAS:
1593 enqueue_wait_for_events = 0
1566 CODE: 1594 CODE:
1567 EVENT_LIST (1, items - 1); 1595 EVENT_LIST (1, items - 1);
1568#if PREFER_1_1 1596#if PREFER_1_1
1569 NEED_SUCCESS (EnqueueWaitForEvents, (self, event_list_count, event_list_ptr)); 1597 NEED_SUCCESS (EnqueueWaitForEvents, (self, event_list_count, event_list_ptr));
1570#else 1598#else
1571 NEED_SUCCESS (EnqueueBarrierWithWaitList, (self, event_list_count, event_list_ptr, 0)); 1599 NEED_SUCCESS (EnqueueBarrierWithWaitList, (self, event_list_count, event_list_ptr, 0));
1572#endif 1600#endif
1573 1601
1574void 1602void
1575enqueue_marker (OpenCL::Queue self, ...) 1603marker (OpenCL::Queue self, ...)
1604 ALIAS:
1605 enqueue_marker = 0
1576 PPCODE: 1606 PPCODE:
1577 cl_event ev = 0; 1607 cl_event ev = 0;
1578 EVENT_LIST (1, items - 1); 1608 EVENT_LIST (1, items - 1);
1579#if PREFER_1_1 1609#if PREFER_1_1
1580 if (!event_list_count) 1610 if (!event_list_count)
1593#endif 1623#endif
1594 if (ev) 1624 if (ev)
1595 XPUSH_CLOBJ ("OpenCL::Event", ev); 1625 XPUSH_CLOBJ ("OpenCL::Event", ev);
1596 1626
1597void 1627void
1598enqueue_barrier (OpenCL::Queue self, ...) 1628barrier (OpenCL::Queue self, ...)
1629 ALIAS:
1630 enqueue_barrier = 0
1599 PPCODE: 1631 PPCODE:
1600 cl_event ev = 0; 1632 cl_event ev = 0;
1601 EVENT_LIST (1, items - 1); 1633 EVENT_LIST (1, items - 1);
1602#if PREFER_1_1 1634#if PREFER_1_1
1603 if (!event_list_count && GIMME_V == G_VOID) 1635 if (!event_list_count && GIMME_V == G_VOID)
2132DESTROY (OpenCL::Kernel self) 2164DESTROY (OpenCL::Kernel self)
2133 CODE: 2165 CODE:
2134 clReleaseKernel (self); 2166 clReleaseKernel (self);
2135 2167
2136void 2168void
2169setf (OpenCL::Kernel self, const char *format, ...)
2170 CODE:
2171 int i;
2172 for (i = 2; ; ++i)
2173 {
2174 while (*format == ' ')
2175 ++format;
2176
2177 char type = *format++;
2178
2179 if (!type)
2180 break;
2181
2182 if (i >= items)
2183 croak ("OpenCL::Kernel::setf format string too long (not enough arguments)");
2184
2185 SV *sv = ST (i);
2186
2187 union
2188 {
2189 cl_char cc; cl_uchar cC; cl_short cs; cl_ushort cS;
2190 cl_int ci; cl_uint cI; cl_long cl; cl_ulong cL;
2191 cl_half ch; cl_float cf; cl_double cd;
2192 cl_mem cm;
2193 cl_sampler ca;
2194 size_t cz;
2195 cl_event ce;
2196 } arg;
2197 size_t size;
2198 int nullarg = 0;
2199
2200 switch (type)
2201 {
2202 case 'c': arg.cc = SvIV (sv); size = sizeof (arg.cc); break;
2203 case 'C': arg.cC = SvUV (sv); size = sizeof (arg.cC); break;
2204 case 's': arg.cs = SvIV (sv); size = sizeof (arg.cs); break;
2205 case 'S': arg.cS = SvUV (sv); size = sizeof (arg.cS); break;
2206 case 'i': arg.ci = SvIV (sv); size = sizeof (arg.ci); break;
2207 case 'I': arg.cI = SvUV (sv); size = sizeof (arg.cI); break;
2208 case 'l': arg.cl = SvIV (sv); size = sizeof (arg.cl); break;
2209 case 'L': arg.cL = SvUV (sv); size = sizeof (arg.cL); break;
2210
2211 case 'h': arg.ch = SvUV (sv); size = sizeof (arg.ch); break;
2212 case 'f': arg.cf = SvNV (sv); size = sizeof (arg.cf); break;
2213 case 'd': arg.cd = SvNV (sv); size = sizeof (arg.cd); break;
2214
2215 case 'z': nullarg = 1; size = SvIV (sv); break;
2216
2217 case 'm': nullarg = !SvOK (sv); arg.cm = SvCLOBJ ("OpenCL::Kernel::setf", "m", sv, "OpenCL::Memory" ); size = sizeof (arg.cm); break;
2218 case 'a': nullarg = !SvOK (sv); arg.ca = SvCLOBJ ("OpenCL::Kernel::setf", "a", sv, "OpenCL::Sampler"); size = sizeof (arg.ca); break;
2219 case 'e': nullarg = !SvOK (sv); arg.ca = SvCLOBJ ("OpenCL::Kernel::setf", "e", sv, "OpenCL::Event" ); size = sizeof (arg.ce); break;
2220
2221 default:
2222 croak ("OpenCL::Kernel::setf format character '%c' not supported", type);
2223 }
2224
2225 res = clSetKernelArg (self, i - 2, size, nullarg ? 0 : &arg);
2226 if (res)
2227 croak ("OpenCL::Kernel::setf kernel parameter '%c' (#%d): %s", type, i - 2, err2str (res));
2228 }
2229
2230 if (i != items)
2231 croak ("OpenCL::Kernel::setf format string too short (too many arguments)");
2232
2233void
2137set_char (OpenCL::Kernel self, cl_uint idx, cl_char value) 2234set_char (OpenCL::Kernel self, cl_uint idx, cl_char value)
2138 CODE: 2235 CODE:
2139 clSetKernelArg (self, idx, sizeof (value), &value); 2236 clSetKernelArg (self, idx, sizeof (value), &value);
2140 2237
2141void 2238void
2189 clSetKernelArg (self, idx, sizeof (value), &value); 2286 clSetKernelArg (self, idx, sizeof (value), &value);
2190 2287
2191void 2288void
2192set_memory (OpenCL::Kernel self, cl_uint idx, OpenCL::Memory_ornull value) 2289set_memory (OpenCL::Kernel self, cl_uint idx, OpenCL::Memory_ornull value)
2193 CODE: 2290 CODE:
2194 clSetKernelArg (self, idx, sizeof (value), &value); 2291 clSetKernelArg (self, idx, sizeof (value), value ? &value : 0);
2195 2292
2196void 2293void
2197set_buffer (OpenCL::Kernel self, cl_uint idx, OpenCL::Buffer_ornull value) 2294set_buffer (OpenCL::Kernel self, cl_uint idx, OpenCL::Buffer_ornull value)
2198 CODE: 2295 CODE:
2199 clSetKernelArg (self, idx, sizeof (value), &value); 2296 clSetKernelArg (self, idx, sizeof (value), value ? &value : 0);
2200 2297
2201void 2298void
2202set_image2d (OpenCL::Kernel self, cl_uint idx, OpenCL::Image2D_ornull value) 2299set_image (OpenCL::Kernel self, cl_uint idx, OpenCL::Image_ornull value)
2300 ALIAS:
2301 set_image2d = 0
2302 set_image3d = 0
2203 CODE: 2303 CODE:
2204 clSetKernelArg (self, idx, sizeof (value), &value); 2304 clSetKernelArg (self, idx, sizeof (value), value ? &value : 0);
2205
2206void
2207set_image3d (OpenCL::Kernel self, cl_uint idx, OpenCL::Image3D_ornull value)
2208 CODE:
2209 clSetKernelArg (self, idx, sizeof (value), &value);
2210 2305
2211void 2306void
2212set_sampler (OpenCL::Kernel self, cl_uint idx, OpenCL::Sampler value) 2307set_sampler (OpenCL::Kernel self, cl_uint idx, OpenCL::Sampler value)
2213 CODE: 2308 CODE:
2214 clSetKernelArg (self, idx, sizeof (value), &value); 2309 clSetKernelArg (self, idx, sizeof (value), &value);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines