… | |
… | |
1510 | |
1510 | |
1511 | lists = tmpbuf (sizeof (size_t) * 3 * gws_len); |
1511 | lists = tmpbuf (sizeof (size_t) * 3 * gws_len); |
1512 | |
1512 | |
1513 | gws = lists + gws_len * 0; |
1513 | gws = lists + gws_len * 0; |
1514 | for (i = 0; i < gws_len; ++i) |
1514 | for (i = 0; i < gws_len; ++i) |
|
|
1515 | { |
1515 | gws [i] = SvIV (AvARRAY (SvRV (global_work_size))[i]); |
1516 | gws [i] = SvIV (AvARRAY (SvRV (global_work_size))[i]); |
|
|
1517 | // at least nvidia crashes for 0-sized work group sizes, work around |
|
|
1518 | if (!gws [i]) |
|
|
1519 | croak ("clEnqueueNDRangeKernel: global_work_size[%d] is zero, must be non-zero", i); |
|
|
1520 | } |
1516 | |
1521 | |
1517 | if (SvOK (global_work_offset)) |
1522 | if (SvOK (global_work_offset)) |
1518 | { |
1523 | { |
1519 | if (!SvROK (global_work_offset) || SvTYPE (SvRV (global_work_offset)) != SVt_PVAV) |
1524 | if (!SvROK (global_work_offset) || SvTYPE (SvRV (global_work_offset)) != SVt_PVAV) |
1520 | croak ("clEnqueueNDRangeKernel: global_work_offset must be undef or an array reference"); |
1525 | croak ("clEnqueueNDRangeKernel: global_work_offset must be undef or an array reference"); |
… | |
… | |
1528 | } |
1533 | } |
1529 | |
1534 | |
1530 | if (SvOK (local_work_size)) |
1535 | if (SvOK (local_work_size)) |
1531 | { |
1536 | { |
1532 | if ((SvOK (local_work_size) && !SvROK (local_work_size)) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV) |
1537 | if ((SvOK (local_work_size) && !SvROK (local_work_size)) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV) |
1533 | croak ("clEnqueueNDRangeKernel: global_work_size must be undef or an array reference"); |
1538 | croak ("clEnqueueNDRangeKernel: local_work_size must be undef or an array reference"); |
1534 | |
1539 | |
1535 | if (AvFILLp (SvRV (local_work_size)) + 1 != gws_len) |
1540 | if (AvFILLp (SvRV (local_work_size)) + 1 != gws_len) |
1536 | croak ("clEnqueueNDRangeKernel: local_work_local must be undef or an array of same size as global_work_size"); |
1541 | croak ("clEnqueueNDRangeKernel: local_work_local must be undef or an array of same size as global_work_size"); |
1537 | |
1542 | |
1538 | lws = lists + gws_len * 2; |
1543 | lws = lists + gws_len * 2; |
1539 | for (i = 0; i < gws_len; ++i) |
1544 | for (i = 0; i < gws_len; ++i) |
|
|
1545 | { |
1540 | lws [i] = SvIV (AvARRAY (SvRV (local_work_size))[i]); |
1546 | lws [i] = SvIV (AvARRAY (SvRV (local_work_size))[i]); |
|
|
1547 | // at least nvidia crashes for 0-sized work group sizes, work around |
|
|
1548 | if (!lws [i]) |
|
|
1549 | croak ("clEnqueueNDRangeKernel: local_work_size[%d] is zero, must be non-zero", i); |
|
|
1550 | } |
1541 | } |
1551 | } |
1542 | |
1552 | |
1543 | NEED_SUCCESS (EnqueueNDRangeKernel, (self, kernel, gws_len, gwo, gws, lws, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); |
1553 | NEED_SUCCESS (EnqueueNDRangeKernel, (self, kernel, gws_len, gwo, gws, lws, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); |
1544 | |
1554 | |
1545 | if (ev) |
1555 | if (ev) |