ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/OpenCL/OpenCL.xs
(Generate patch)

Comparing OpenCL/OpenCL.xs (file contents):
Revision 1.52 by root, Tue Apr 24 23:53:12 2012 UTC vs.
Revision 1.60 by root, Mon Apr 30 09:38:53 2012 UTC

270/* they can be upgraded at runtime to the array form */ 270/* they can be upgraded at runtime to the array form */
271static void * 271static void *
272SvCLOBJ (const char *func, const char *svname, SV *sv, const char *pkg) 272SvCLOBJ (const char *func, const char *svname, SV *sv, const char *pkg)
273{ 273{
274 if (SvROK (sv) && sv_derived_from (sv, pkg)) 274 if (SvROK (sv) && sv_derived_from (sv, pkg))
275 {
276 SV *rv = SvRV (sv);
277
278#if CLOBJ_PUSH
279 if (SvTYPE (rv) == SVt_PVAV)
280 rv = AvARRAY (rv)[0];
281#endif
282
283 return (void *)SvIV (rv); 275 return (void *)SvIV (SvRV (sv));
284 }
285 276
286 croak ("%s: %s is not of type %s", func, svname, pkg); 277 croak ("%s: %s is not of type %s", func, svname, pkg);
287} 278}
288
289#if CLOBJ_PUSH
290
291static void
292CLOBJ_push (SV *self, SV *data)
293{
294 SV *rv = SvRV (self);
295
296 if (SvTYPE (rv) != SVt_PVAV)
297 {
298 AV *av = newAV ();
299 av_push (av, rv);
300 rv = (SV *)av;
301 SvRV_set (self, rv);
302 }
303
304 av_push ((AV *)rv, data);
305}
306
307static SV *
308sv_struct (STRLEN size)
309{
310 SV *sv = newSV (size);
311 SvPOK_only (sv);
312 return sv;
313}
314
315static void *
316CLOBJ_push_struct (SV *self, STRLEN size)
317{
318 SV *sv = sv_struct (size);
319 CLOBJ_push (self, sv);
320 return SvPVX (sv);
321}
322
323#endif
324 279
325/*****************************************************************************/ 280/*****************************************************************************/
326/* callback stuff */ 281/* callback stuff */
327 282
328/* default context callback, log to stderr */ 283/* default context callback, log to stderr */
386 341
387 X_UNLOCK (eq_lock); 342 X_UNLOCK (eq_lock);
388 343
389 return res; 344 return res;
390} 345}
391
392#if 0
393static void
394mem_free (pTHX_ void *p)
395{
396 free (p);
397}
398//SAVEDESTRUCTOR_X (mem_free, item);
399#endif
400 346
401static void 347static void
402eq_poll (void) 348eq_poll (void)
403{ 349{
404 eq_item *item; 350 eq_item *item;
655 RETVAL = res; 601 RETVAL = res;
656 OUTPUT: 602 OUTPUT:
657 RETVAL 603 RETVAL
658 604
659const char * 605const char *
660err2str (cl_int err) 606err2str (cl_int err = res)
661 607
662const char * 608const char *
663enum2str (cl_uint value) 609enum2str (cl_uint value)
664 610
665void 611void
1050 XPUSH_CLOBJ ("OpenCL::BufferObj", mem); 996 XPUSH_CLOBJ ("OpenCL::BufferObj", mem);
1051 997
1052#if CL_VERSION_1_2 998#if CL_VERSION_1_2
1053 999
1054void 1000void
1055image (OpenCL::Context self, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, cl_mem_object_type type, size_t width, size_t height, size_t depth, size_t array_size = 0, size_t row_pitch = 0, size_t slice_pitch = 0, cl_uint num_mip_level = 0, cl_uint num_samples = 0, SV *data = &PL_sv_undef) 1001image (OpenCL::Context self, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, cl_mem_object_type type, size_t width, size_t height, size_t depth = 0, size_t array_size = 0, size_t row_pitch = 0, size_t slice_pitch = 0, cl_uint num_mip_level = 0, cl_uint num_samples = 0, SV *data = &PL_sv_undef)
1056 PPCODE: 1002 PPCODE:
1057 STRLEN len; 1003 STRLEN len;
1058 char *ptr = SvOK (data) ? SvPVbyte (data, len) : 0; 1004 char *ptr = SvOK (data) ? SvPVbyte (data, len) : 0;
1059 const cl_image_format format = { channel_order, channel_type }; 1005 const cl_image_format format = { channel_order, channel_type };
1060 const cl_image_desc desc = { 1006 const cl_image_desc desc = {
1253DESTROY (OpenCL::Queue self) 1199DESTROY (OpenCL::Queue self)
1254 CODE: 1200 CODE:
1255 clReleaseCommandQueue (self); 1201 clReleaseCommandQueue (self);
1256 1202
1257void 1203void
1258enqueue_read_buffer (OpenCL::Queue self, OpenCL::Buffer mem, cl_bool blocking, size_t offset, size_t len, SV *data, ...) 1204read_buffer (OpenCL::Queue self, OpenCL::Buffer mem, cl_bool blocking, size_t offset, size_t len, SV *data, ...)
1205 ALIAS:
1206 enqueue_read_buffer = 0
1259 PPCODE: 1207 PPCODE:
1260 cl_event ev = 0; 1208 cl_event ev = 0;
1261 EVENT_LIST (6, items - 6); 1209 EVENT_LIST (6, items - 6);
1262 1210
1263 SvUPGRADE (data, SVt_PV); 1211 SvUPGRADE (data, SVt_PV);
1268 1216
1269 if (ev) 1217 if (ev)
1270 XPUSH_CLOBJ ("OpenCL::Event", ev); 1218 XPUSH_CLOBJ ("OpenCL::Event", ev);
1271 1219
1272void 1220void
1273enqueue_write_buffer (OpenCL::Queue self, OpenCL::Buffer mem, cl_bool blocking, size_t offset, SV *data, ...) 1221write_buffer (OpenCL::Queue self, OpenCL::Buffer mem, cl_bool blocking, size_t offset, SV *data, ...)
1222 ALIAS:
1223 enqueue_write_buffer = 0
1274 PPCODE: 1224 PPCODE:
1275 cl_event ev = 0; 1225 cl_event ev = 0;
1276 STRLEN len; 1226 STRLEN len;
1277 char *ptr = SvPVbyte (data, len); 1227 char *ptr = SvPVbyte (data, len);
1278 EVENT_LIST (5, items - 5); 1228 EVENT_LIST (5, items - 5);
1283 XPUSH_CLOBJ ("OpenCL::Event", ev); 1233 XPUSH_CLOBJ ("OpenCL::Event", ev);
1284 1234
1285#if CL_VERSION_1_2 1235#if CL_VERSION_1_2
1286 1236
1287void 1237void
1288enqueue_fill_buffer (OpenCL::Queue self, OpenCL::Buffer mem, SV *data, size_t offset, size_t size, ...) 1238fill_buffer (OpenCL::Queue self, OpenCL::Buffer mem, SV *data, size_t offset, size_t size, ...)
1239 ALIAS:
1240 enqueue_fill_buffer = 0
1289 PPCODE: 1241 PPCODE:
1290 cl_event ev = 0; 1242 cl_event ev = 0;
1291 STRLEN len; 1243 STRLEN len;
1292 char *ptr = SvPVbyte (data, len); 1244 char *ptr = SvPVbyte (data, len);
1293 EVENT_LIST (5, items - 5); 1245 EVENT_LIST (5, items - 5);
1296 1248
1297 if (ev) 1249 if (ev)
1298 XPUSH_CLOBJ ("OpenCL::Event", ev); 1250 XPUSH_CLOBJ ("OpenCL::Event", ev);
1299 1251
1300void 1252void
1301enqueue_fill_image (OpenCL::Queue self, OpenCL::Image img, NV r, NV g, NV b, NV a, size_t x, size_t y, size_t z, size_t width, size_t height, size_t depth, ...) 1253fill_image (OpenCL::Queue self, OpenCL::Image img, NV r, NV g, NV b, NV a, size_t x, size_t y, size_t z, size_t width, size_t height, size_t depth, ...)
1254 ALIAS:
1255 enqueue_fill_image = 0
1302 PPCODE: 1256 PPCODE:
1303 cl_event ev = 0; 1257 cl_event ev = 0;
1304 STRLEN len; 1258 STRLEN len;
1305 const size_t origin [3] = { x, y, z }; 1259 const size_t origin [3] = { x, y, z };
1306 const size_t region [3] = { width, height, depth }; 1260 const size_t region [3] = { width, height, depth };
1315 NEED_SUCCESS (GetImageInfo, (img, CL_IMAGE_FORMAT, sizeof (format), &format, 0)); 1269 NEED_SUCCESS (GetImageInfo, (img, CL_IMAGE_FORMAT, sizeof (format), &format, 0));
1316 assert (sizeof (fus) == CL_FLOAT + 1 - CL_SNORM_INT8); 1270 assert (sizeof (fus) == CL_FLOAT + 1 - CL_SNORM_INT8);
1317 if (format.image_channel_data_type < CL_SNORM_INT8 || CL_FLOAT < format.image_channel_data_type) 1271 if (format.image_channel_data_type < CL_SNORM_INT8 || CL_FLOAT < format.image_channel_data_type)
1318 croak ("enqueue_fill_image: image has unsupported channel type, only opencl 1.2 channel types supported."); 1272 croak ("enqueue_fill_image: image has unsupported channel type, only opencl 1.2 channel types supported.");
1319 1273
1320 NEED_SUCCESS (EnqueueFillImage, (self, img, c_fus [fus [format.image_channel_data_type]], 1274 NEED_SUCCESS (EnqueueFillImage, (self, img, c_fus [fus [format.image_channel_data_type - CL_SNORM_INT8]],
1321 origin, region, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); 1275 origin, region, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0));
1322 1276
1323 if (ev) 1277 if (ev)
1324 XPUSH_CLOBJ ("OpenCL::Event", ev); 1278 XPUSH_CLOBJ ("OpenCL::Event", ev);
1325 1279
1326#endif 1280#endif
1327 1281
1328void 1282void
1329enqueue_copy_buffer (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_offset, size_t dst_offset, size_t len, ...) 1283copy_buffer (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_offset, size_t dst_offset, size_t len, ...)
1284 ALIAS:
1285 enqueue_copy_buffer = 0
1330 PPCODE: 1286 PPCODE:
1331 cl_event ev = 0; 1287 cl_event ev = 0;
1332 EVENT_LIST (6, items - 6); 1288 EVENT_LIST (6, items - 6);
1333 1289
1334 NEED_SUCCESS (EnqueueCopyBuffer, (self, src, dst, src_offset, dst_offset, len, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); 1290 NEED_SUCCESS (EnqueueCopyBuffer, (self, src, dst, src_offset, dst_offset, len, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0));
1335 1291
1336 if (ev) 1292 if (ev)
1337 XPUSH_CLOBJ ("OpenCL::Event", ev); 1293 XPUSH_CLOBJ ("OpenCL::Event", ev);
1338 1294
1339void 1295void
1340enqueue_read_buffer_rect (OpenCL::Queue self, OpenCL::Memory buf, cl_bool blocking, size_t buf_x, size_t buf_y, size_t buf_z, size_t host_x, size_t host_y, size_t host_z, size_t width, size_t height, size_t depth, size_t buf_row_pitch, size_t buf_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, SV *data, ...) 1296read_buffer_rect (OpenCL::Queue self, OpenCL::Memory buf, cl_bool blocking, size_t buf_x, size_t buf_y, size_t buf_z, size_t host_x, size_t host_y, size_t host_z, size_t width, size_t height, size_t depth, size_t buf_row_pitch, size_t buf_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, SV *data, ...)
1297 ALIAS:
1298 enqueue_read_buffer_rect = 0
1341 PPCODE: 1299 PPCODE:
1342 cl_event ev = 0; 1300 cl_event ev = 0;
1343 const size_t buf_origin [3] = { buf_x , buf_y , buf_z }; 1301 const size_t buf_origin [3] = { buf_x , buf_y , buf_z };
1344 const size_t host_origin[3] = { host_x, host_y, host_z }; 1302 const size_t host_origin[3] = { host_x, host_y, host_z };
1345 const size_t region[3] = { width, height, depth }; 1303 const size_t region[3] = { width, height, depth };
1367 1325
1368 if (ev) 1326 if (ev)
1369 XPUSH_CLOBJ ("OpenCL::Event", ev); 1327 XPUSH_CLOBJ ("OpenCL::Event", ev);
1370 1328
1371void 1329void
1372enqueue_write_buffer_rect (OpenCL::Queue self, OpenCL::Memory buf, cl_bool blocking, size_t buf_x, size_t buf_y, size_t buf_z, size_t host_x, size_t host_y, size_t host_z, size_t width, size_t height, size_t depth, size_t buf_row_pitch, size_t buf_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, SV *data, ...) 1330write_buffer_rect (OpenCL::Queue self, OpenCL::Memory buf, cl_bool blocking, size_t buf_x, size_t buf_y, size_t buf_z, size_t host_x, size_t host_y, size_t host_z, size_t width, size_t height, size_t depth, size_t buf_row_pitch, size_t buf_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, SV *data, ...)
1331 ALIAS:
1332 enqueue_write_buffer_rect = 0
1373 PPCODE: 1333 PPCODE:
1374 cl_event ev = 0; 1334 cl_event ev = 0;
1375 const size_t buf_origin [3] = { buf_x , buf_y , buf_z }; 1335 const size_t buf_origin [3] = { buf_x , buf_y , buf_z };
1376 const size_t host_origin[3] = { host_x, host_y, host_z }; 1336 const size_t host_origin[3] = { host_x, host_y, host_z };
1377 const size_t region[3] = { width, height, depth }; 1337 const size_t region[3] = { width, height, depth };
1400 1360
1401 if (ev) 1361 if (ev)
1402 XPUSH_CLOBJ ("OpenCL::Event", ev); 1362 XPUSH_CLOBJ ("OpenCL::Event", ev);
1403 1363
1404void 1364void
1405enqueue_copy_buffer_rect (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, ...) 1365copy_buffer_rect (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, ...)
1366 ALIAS:
1367 enqueue_copy_buffer_rect = 0
1406 PPCODE: 1368 PPCODE:
1407 cl_event ev = 0; 1369 cl_event ev = 0;
1408 const size_t src_origin[3] = { src_x, src_y, src_z }; 1370 const size_t src_origin[3] = { src_x, src_y, src_z };
1409 const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; 1371 const size_t dst_origin[3] = { dst_x, dst_y, dst_z };
1410 const size_t region[3] = { width, height, depth }; 1372 const size_t region[3] = { width, height, depth };
1414 1376
1415 if (ev) 1377 if (ev)
1416 XPUSH_CLOBJ ("OpenCL::Event", ev); 1378 XPUSH_CLOBJ ("OpenCL::Event", ev);
1417 1379
1418void 1380void
1419enqueue_read_image (OpenCL::Queue self, OpenCL::Image src, cl_bool blocking, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...) 1381read_image (OpenCL::Queue self, OpenCL::Image src, cl_bool blocking, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...)
1382 ALIAS:
1383 enqueue_read_image = 0
1420 PPCODE: 1384 PPCODE:
1421 cl_event ev = 0; 1385 cl_event ev = 0;
1422 const size_t src_origin[3] = { src_x, src_y, src_z }; 1386 const size_t src_origin[3] = { src_x, src_y, src_z };
1423 const size_t region[3] = { width, height, depth }; 1387 const size_t region[3] = { width, height, depth };
1424 EVENT_LIST (12, items - 12); 1388 EVENT_LIST (12, items - 12);
1439 1403
1440 if (ev) 1404 if (ev)
1441 XPUSH_CLOBJ ("OpenCL::Event", ev); 1405 XPUSH_CLOBJ ("OpenCL::Event", ev);
1442 1406
1443void 1407void
1444enqueue_write_image (OpenCL::Queue self, OpenCL::Image dst, cl_bool blocking, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...) 1408write_image (OpenCL::Queue self, OpenCL::Image dst, cl_bool blocking, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...)
1409 ALIAS:
1410 enqueue_write_image = 0
1445 PPCODE: 1411 PPCODE:
1446 cl_event ev = 0; 1412 cl_event ev = 0;
1447 const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; 1413 const size_t dst_origin[3] = { dst_x, dst_y, dst_z };
1448 const size_t region[3] = { width, height, depth }; 1414 const size_t region[3] = { width, height, depth };
1449 STRLEN len; 1415 STRLEN len;
1465 1431
1466 if (ev) 1432 if (ev)
1467 XPUSH_CLOBJ ("OpenCL::Event", ev); 1433 XPUSH_CLOBJ ("OpenCL::Event", ev);
1468 1434
1469void 1435void
1470enqueue_copy_image (OpenCL::Queue self, OpenCL::Image src, OpenCL::Image dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) 1436copy_image (OpenCL::Queue self, OpenCL::Image src, OpenCL::Image dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...)
1437 ALIAS:
1438 enqueue_copy_image = 0
1471 PPCODE: 1439 PPCODE:
1472 cl_event ev = 0; 1440 cl_event ev = 0;
1473 const size_t src_origin[3] = { src_x, src_y, src_z }; 1441 const size_t src_origin[3] = { src_x, src_y, src_z };
1474 const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; 1442 const size_t dst_origin[3] = { dst_x, dst_y, dst_z };
1475 const size_t region[3] = { width, height, depth }; 1443 const size_t region[3] = { width, height, depth };
1479 1447
1480 if (ev) 1448 if (ev)
1481 XPUSH_CLOBJ ("OpenCL::Event", ev); 1449 XPUSH_CLOBJ ("OpenCL::Event", ev);
1482 1450
1483void 1451void
1484enqueue_copy_image_to_buffer (OpenCL::Queue self, OpenCL::Image src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t dst_offset, ...) 1452copy_image_to_buffer (OpenCL::Queue self, OpenCL::Image src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t dst_offset, ...)
1453 ALIAS:
1454 enqueue_copy_image_to_buffer = 0
1485 PPCODE: 1455 PPCODE:
1486 cl_event ev = 0; 1456 cl_event ev = 0;
1487 const size_t src_origin[3] = { src_x, src_y, src_z }; 1457 const size_t src_origin[3] = { src_x, src_y, src_z };
1488 const size_t region[3] = { width, height, depth }; 1458 const size_t region[3] = { width, height, depth };
1489 EVENT_LIST (10, items - 10); 1459 EVENT_LIST (10, items - 10);
1492 1462
1493 if (ev) 1463 if (ev)
1494 XPUSH_CLOBJ ("OpenCL::Event", ev); 1464 XPUSH_CLOBJ ("OpenCL::Event", ev);
1495 1465
1496void 1466void
1497enqueue_copy_buffer_to_image (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Image dst, size_t src_offset, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) 1467copy_buffer_to_image (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Image dst, size_t src_offset, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...)
1468 ALIAS:
1469 enqueue_copy_buffer_to_image = 0
1498 PPCODE: 1470 PPCODE:
1499 cl_event ev = 0; 1471 cl_event ev = 0;
1500 const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; 1472 const size_t dst_origin[3] = { dst_x, dst_y, dst_z };
1501 const size_t region[3] = { width, height, depth }; 1473 const size_t region[3] = { width, height, depth };
1502 EVENT_LIST (10, items - 10); 1474 EVENT_LIST (10, items - 10);
1505 1477
1506 if (ev) 1478 if (ev)
1507 XPUSH_CLOBJ ("OpenCL::Event", ev); 1479 XPUSH_CLOBJ ("OpenCL::Event", ev);
1508 1480
1509void 1481void
1510enqueue_task (OpenCL::Queue self, OpenCL::Kernel kernel, ...) 1482task (OpenCL::Queue self, OpenCL::Kernel kernel, ...)
1483 ALIAS:
1484 enqueue_task = 0
1511 PPCODE: 1485 PPCODE:
1512 cl_event ev = 0; 1486 cl_event ev = 0;
1513 EVENT_LIST (2, items - 2); 1487 EVENT_LIST (2, items - 2);
1514 1488
1515 NEED_SUCCESS (EnqueueTask, (self, kernel, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); 1489 NEED_SUCCESS (EnqueueTask, (self, kernel, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0));
1516 1490
1517 if (ev) 1491 if (ev)
1518 XPUSH_CLOBJ ("OpenCL::Event", ev); 1492 XPUSH_CLOBJ ("OpenCL::Event", ev);
1519 1493
1520void 1494void
1521enqueue_nd_range_kernel (OpenCL::Queue self, OpenCL::Kernel kernel, SV *global_work_offset, SV *global_work_size, SV *local_work_size = &PL_sv_undef, ...) 1495nd_range_kernel (OpenCL::Queue self, OpenCL::Kernel kernel, SV *global_work_offset, SV *global_work_size, SV *local_work_size = &PL_sv_undef, ...)
1496 ALIAS:
1497 enqueue_nd_range_kernel = 0
1522 PPCODE: 1498 PPCODE:
1523 cl_event ev = 0; 1499 cl_event ev = 0;
1524 size_t *gwo = 0, *gws, *lws = 0; 1500 size_t *gwo = 0, *gws, *lws = 0;
1525 int gws_len; 1501 int gws_len;
1526 size_t *lists; 1502 size_t *lists;
1534 1510
1535 lists = tmpbuf (sizeof (size_t) * 3 * gws_len); 1511 lists = tmpbuf (sizeof (size_t) * 3 * gws_len);
1536 1512
1537 gws = lists + gws_len * 0; 1513 gws = lists + gws_len * 0;
1538 for (i = 0; i < gws_len; ++i) 1514 for (i = 0; i < gws_len; ++i)
1515 {
1539 gws [i] = SvIV (AvARRAY (SvRV (global_work_size))[i]); 1516 gws [i] = SvIV (AvARRAY (SvRV (global_work_size))[i]);
1517 // at least nvidia crashes for 0-sized work group sizes, work around
1518 if (!gws [i])
1519 croak ("clEnqueueNDRangeKernel: global_work_size[%d] is zero, must be non-zero", i);
1520 }
1540 1521
1541 if (SvOK (global_work_offset)) 1522 if (SvOK (global_work_offset))
1542 { 1523 {
1543 if (!SvROK (global_work_offset) || SvTYPE (SvRV (global_work_offset)) != SVt_PVAV) 1524 if (!SvROK (global_work_offset) || SvTYPE (SvRV (global_work_offset)) != SVt_PVAV)
1544 croak ("clEnqueueNDRangeKernel: global_work_offset must be undef or an array reference"); 1525 croak ("clEnqueueNDRangeKernel: global_work_offset must be undef or an array reference");
1552 } 1533 }
1553 1534
1554 if (SvOK (local_work_size)) 1535 if (SvOK (local_work_size))
1555 { 1536 {
1556 if ((SvOK (local_work_size) && !SvROK (local_work_size)) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV) 1537 if ((SvOK (local_work_size) && !SvROK (local_work_size)) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV)
1557 croak ("clEnqueueNDRangeKernel: global_work_size must be undef or an array reference"); 1538 croak ("clEnqueueNDRangeKernel: local_work_size must be undef or an array reference");
1558 1539
1559 if (AvFILLp (SvRV (local_work_size)) + 1 != gws_len) 1540 if (AvFILLp (SvRV (local_work_size)) + 1 != gws_len)
1560 croak ("clEnqueueNDRangeKernel: local_work_local must be undef or an array of same size as global_work_size"); 1541 croak ("clEnqueueNDRangeKernel: local_work_local must be undef or an array of same size as global_work_size");
1561 1542
1562 lws = lists + gws_len * 2; 1543 lws = lists + gws_len * 2;
1563 for (i = 0; i < gws_len; ++i) 1544 for (i = 0; i < gws_len; ++i)
1545 {
1564 lws [i] = SvIV (AvARRAY (SvRV (local_work_size))[i]); 1546 lws [i] = SvIV (AvARRAY (SvRV (local_work_size))[i]);
1547 // at least nvidia crashes for 0-sized work group sizes, work around
1548 if (!lws [i])
1549 croak ("clEnqueueNDRangeKernel: local_work_size[%d] is zero, must be non-zero", i);
1550 }
1565 } 1551 }
1566 1552
1567 NEED_SUCCESS (EnqueueNDRangeKernel, (self, kernel, gws_len, gwo, gws, lws, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); 1553 NEED_SUCCESS (EnqueueNDRangeKernel, (self, kernel, gws_len, gwo, gws, lws, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0));
1568 1554
1569 if (ev) 1555 if (ev)
1570 XPUSH_CLOBJ ("OpenCL::Event", ev); 1556 XPUSH_CLOBJ ("OpenCL::Event", ev);
1571 1557
1572#if cl_apple_gl_sharing || cl_khr_gl_sharing 1558#if cl_apple_gl_sharing || cl_khr_gl_sharing
1573 1559
1574void 1560void
1575enqueue_acquire_gl_objects (OpenCL::Queue self, SV *objects, ...) 1561acquire_gl_objects (OpenCL::Queue self, SV *objects, ...)
1576 ALIAS: 1562 ALIAS:
1563 release_gl_objects = 1
1564 enqueue_acquire_gl_objects = 0
1577 enqueue_release_gl_objects = 1 1565 enqueue_release_gl_objects = 1
1578 PPCODE: 1566 PPCODE:
1579 if (!SvROK (objects) || SvTYPE (SvRV (objects)) != SVt_PVAV) 1567 if (!SvROK (objects) || SvTYPE (SvRV (objects)) != SVt_PVAV)
1580 croak ("OpenCL::Queue::enqueue_acquire/release_gl_objects argument 'objects' must be an arrayref with memory objects, in call"); 1568 croak ("OpenCL::Queue::enqueue_acquire/release_gl_objects argument 'objects' must be an arrayref with memory objects, in call");
1581 1569
1598 XPUSH_CLOBJ ("OpenCL::Event", ev); 1586 XPUSH_CLOBJ ("OpenCL::Event", ev);
1599 1587
1600#endif 1588#endif
1601 1589
1602void 1590void
1603enqueue_wait_for_events (OpenCL::Queue self, ...) 1591wait_for_events (OpenCL::Queue self, ...)
1592 ALIAS:
1593 enqueue_wait_for_events = 0
1604 CODE: 1594 CODE:
1605 EVENT_LIST (1, items - 1); 1595 EVENT_LIST (1, items - 1);
1606#if PREFER_1_1 1596#if PREFER_1_1
1607 NEED_SUCCESS (EnqueueWaitForEvents, (self, event_list_count, event_list_ptr)); 1597 NEED_SUCCESS (EnqueueWaitForEvents, (self, event_list_count, event_list_ptr));
1608#else 1598#else
1609 NEED_SUCCESS (EnqueueBarrierWithWaitList, (self, event_list_count, event_list_ptr, 0)); 1599 NEED_SUCCESS (EnqueueBarrierWithWaitList, (self, event_list_count, event_list_ptr, 0));
1610#endif 1600#endif
1611 1601
1612void 1602void
1613enqueue_marker (OpenCL::Queue self, ...) 1603marker (OpenCL::Queue self, ...)
1604 ALIAS:
1605 enqueue_marker = 0
1614 PPCODE: 1606 PPCODE:
1615 cl_event ev = 0; 1607 cl_event ev = 0;
1616 EVENT_LIST (1, items - 1); 1608 EVENT_LIST (1, items - 1);
1617#if PREFER_1_1 1609#if PREFER_1_1
1618 if (!event_list_count) 1610 if (!event_list_count)
1631#endif 1623#endif
1632 if (ev) 1624 if (ev)
1633 XPUSH_CLOBJ ("OpenCL::Event", ev); 1625 XPUSH_CLOBJ ("OpenCL::Event", ev);
1634 1626
1635void 1627void
1636enqueue_barrier (OpenCL::Queue self, ...) 1628barrier (OpenCL::Queue self, ...)
1629 ALIAS:
1630 enqueue_barrier = 0
1637 PPCODE: 1631 PPCODE:
1638 cl_event ev = 0; 1632 cl_event ev = 0;
1639 EVENT_LIST (1, items - 1); 1633 EVENT_LIST (1, items - 1);
1640#if PREFER_1_1 1634#if PREFER_1_1
1641 if (!event_list_count && GIMME_V == G_VOID) 1635 if (!event_list_count && GIMME_V == G_VOID)
2170DESTROY (OpenCL::Kernel self) 2164DESTROY (OpenCL::Kernel self)
2171 CODE: 2165 CODE:
2172 clReleaseKernel (self); 2166 clReleaseKernel (self);
2173 2167
2174void 2168void
2169setf (OpenCL::Kernel self, const char *format, ...)
2170 CODE:
2171 int i;
2172 for (i = 2; ; ++i)
2173 {
2174 while (*format == ' ')
2175 ++format;
2176
2177 char type = *format++;
2178
2179 if (!type)
2180 break;
2181
2182 if (i >= items)
2183 croak ("OpenCL::Kernel::setf format string too long (not enough arguments)");
2184
2185 SV *sv = ST (i);
2186
2187 union
2188 {
2189 cl_char cc; cl_uchar cC; cl_short cs; cl_ushort cS;
2190 cl_int ci; cl_uint cI; cl_long cl; cl_ulong cL;
2191 cl_half ch; cl_float cf; cl_double cd;
2192 cl_mem cm;
2193 cl_sampler ca;
2194 size_t cz;
2195 cl_event ce;
2196 } arg;
2197 size_t size;
2198 int nullarg = 0;
2199
2200 switch (type)
2201 {
2202 case 'c': arg.cc = SvIV (sv); size = sizeof (arg.cc); break;
2203 case 'C': arg.cC = SvUV (sv); size = sizeof (arg.cC); break;
2204 case 's': arg.cs = SvIV (sv); size = sizeof (arg.cs); break;
2205 case 'S': arg.cS = SvUV (sv); size = sizeof (arg.cS); break;
2206 case 'i': arg.ci = SvIV (sv); size = sizeof (arg.ci); break;
2207 case 'I': arg.cI = SvUV (sv); size = sizeof (arg.cI); break;
2208 case 'l': arg.cl = SvIV (sv); size = sizeof (arg.cl); break;
2209 case 'L': arg.cL = SvUV (sv); size = sizeof (arg.cL); break;
2210
2211 case 'h': arg.ch = SvUV (sv); size = sizeof (arg.ch); break;
2212 case 'f': arg.cf = SvNV (sv); size = sizeof (arg.cf); break;
2213 case 'd': arg.cd = SvNV (sv); size = sizeof (arg.cd); break;
2214
2215 case 'z': nullarg = 1; size = SvIV (sv); break;
2216
2217 case 'm': nullarg = !SvOK (sv); arg.cm = SvCLOBJ ("OpenCL::Kernel::setf", "m", sv, "OpenCL::Memory" ); size = sizeof (arg.cm); break;
2218 case 'a': nullarg = !SvOK (sv); arg.ca = SvCLOBJ ("OpenCL::Kernel::setf", "a", sv, "OpenCL::Sampler"); size = sizeof (arg.ca); break;
2219 case 'e': nullarg = !SvOK (sv); arg.ca = SvCLOBJ ("OpenCL::Kernel::setf", "e", sv, "OpenCL::Event" ); size = sizeof (arg.ce); break;
2220
2221 default:
2222 croak ("OpenCL::Kernel::setf format character '%c' not supported", type);
2223 }
2224
2225 res = clSetKernelArg (self, i - 2, size, nullarg ? 0 : &arg);
2226 if (res)
2227 croak ("OpenCL::Kernel::setf kernel parameter '%c' (#%d): %s", type, i - 2, err2str (res));
2228 }
2229
2230 if (i != items)
2231 croak ("OpenCL::Kernel::setf format string too short (too many arguments)");
2232
2233void
2175set_char (OpenCL::Kernel self, cl_uint idx, cl_char value) 2234set_char (OpenCL::Kernel self, cl_uint idx, cl_char value)
2176 CODE: 2235 CODE:
2177 clSetKernelArg (self, idx, sizeof (value), &value); 2236 clSetKernelArg (self, idx, sizeof (value), &value);
2178 2237
2179void 2238void
2227 clSetKernelArg (self, idx, sizeof (value), &value); 2286 clSetKernelArg (self, idx, sizeof (value), &value);
2228 2287
2229void 2288void
2230set_memory (OpenCL::Kernel self, cl_uint idx, OpenCL::Memory_ornull value) 2289set_memory (OpenCL::Kernel self, cl_uint idx, OpenCL::Memory_ornull value)
2231 CODE: 2290 CODE:
2232 clSetKernelArg (self, idx, sizeof (value), &value); 2291 clSetKernelArg (self, idx, sizeof (value), value ? &value : 0);
2233 2292
2234void 2293void
2235set_buffer (OpenCL::Kernel self, cl_uint idx, OpenCL::Buffer_ornull value) 2294set_buffer (OpenCL::Kernel self, cl_uint idx, OpenCL::Buffer_ornull value)
2236 CODE: 2295 CODE:
2237 clSetKernelArg (self, idx, sizeof (value), &value); 2296 clSetKernelArg (self, idx, sizeof (value), value ? &value : 0);
2238 2297
2239void 2298void
2240set_image2d (OpenCL::Kernel self, cl_uint idx, OpenCL::Image2D_ornull value) 2299set_image (OpenCL::Kernel self, cl_uint idx, OpenCL::Image_ornull value)
2300 ALIAS:
2301 set_image2d = 0
2302 set_image3d = 0
2241 CODE: 2303 CODE:
2242 clSetKernelArg (self, idx, sizeof (value), &value); 2304 clSetKernelArg (self, idx, sizeof (value), value ? &value : 0);
2243
2244void
2245set_image3d (OpenCL::Kernel self, cl_uint idx, OpenCL::Image3D_ornull value)
2246 CODE:
2247 clSetKernelArg (self, idx, sizeof (value), &value);
2248 2305
2249void 2306void
2250set_sampler (OpenCL::Kernel self, cl_uint idx, OpenCL::Sampler value) 2307set_sampler (OpenCL::Kernel self, cl_uint idx, OpenCL::Sampler value)
2251 CODE: 2308 CODE:
2252 clSetKernelArg (self, idx, sizeof (value), &value); 2309 clSetKernelArg (self, idx, sizeof (value), &value);

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines