… | |
… | |
1510 | |
1510 | |
1511 | lists = tmpbuf (sizeof (size_t) * 3 * gws_len); |
1511 | lists = tmpbuf (sizeof (size_t) * 3 * gws_len); |
1512 | |
1512 | |
1513 | gws = lists + gws_len * 0; |
1513 | gws = lists + gws_len * 0; |
1514 | for (i = 0; i < gws_len; ++i) |
1514 | for (i = 0; i < gws_len; ++i) |
|
|
1515 | { |
1515 | gws [i] = SvIV (AvARRAY (SvRV (global_work_size))[i]); |
1516 | gws [i] = SvIV (AvARRAY (SvRV (global_work_size))[i]); |
|
|
1517 | // at least nvidia crashes for 0-sized work group sizes, work around |
|
|
1518 | if (!gws [i]) |
|
|
1519 | croak ("clEnqueueNDRangeKernel: global_work_size[%d] is zero, must be non-zero", i); |
|
|
1520 | } |
1516 | |
1521 | |
1517 | if (SvOK (global_work_offset)) |
1522 | if (SvOK (global_work_offset)) |
1518 | { |
1523 | { |
1519 | if (!SvROK (global_work_offset) || SvTYPE (SvRV (global_work_offset)) != SVt_PVAV) |
1524 | if (!SvROK (global_work_offset) || SvTYPE (SvRV (global_work_offset)) != SVt_PVAV) |
1520 | croak ("clEnqueueNDRangeKernel: global_work_offset must be undef or an array reference"); |
1525 | croak ("clEnqueueNDRangeKernel: global_work_offset must be undef or an array reference"); |
… | |
… | |
1528 | } |
1533 | } |
1529 | |
1534 | |
1530 | if (SvOK (local_work_size)) |
1535 | if (SvOK (local_work_size)) |
1531 | { |
1536 | { |
1532 | if ((SvOK (local_work_size) && !SvROK (local_work_size)) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV) |
1537 | if ((SvOK (local_work_size) && !SvROK (local_work_size)) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV) |
1533 | croak ("clEnqueueNDRangeKernel: global_work_size must be undef or an array reference"); |
1538 | croak ("clEnqueueNDRangeKernel: local_work_size must be undef or an array reference"); |
1534 | |
1539 | |
1535 | if (AvFILLp (SvRV (local_work_size)) + 1 != gws_len) |
1540 | if (AvFILLp (SvRV (local_work_size)) + 1 != gws_len) |
1536 | croak ("clEnqueueNDRangeKernel: local_work_local must be undef or an array of same size as global_work_size"); |
1541 | croak ("clEnqueueNDRangeKernel: local_work_local must be undef or an array of same size as global_work_size"); |
1537 | |
1542 | |
1538 | lws = lists + gws_len * 2; |
1543 | lws = lists + gws_len * 2; |
1539 | for (i = 0; i < gws_len; ++i) |
1544 | for (i = 0; i < gws_len; ++i) |
|
|
1545 | { |
1540 | lws [i] = SvIV (AvARRAY (SvRV (local_work_size))[i]); |
1546 | lws [i] = SvIV (AvARRAY (SvRV (local_work_size))[i]); |
|
|
1547 | // at least nvidia crashes for 0-sized work group sizes, work around |
|
|
1548 | if (!lws [i]) |
|
|
1549 | croak ("clEnqueueNDRangeKernel: local_work_size[%d] is zero, must be non-zero", i); |
|
|
1550 | } |
1541 | } |
1551 | } |
1542 | |
1552 | |
1543 | NEED_SUCCESS (EnqueueNDRangeKernel, (self, kernel, gws_len, gwo, gws, lws, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); |
1553 | NEED_SUCCESS (EnqueueNDRangeKernel, (self, kernel, gws_len, gwo, gws, lws, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); |
1544 | |
1554 | |
1545 | if (ev) |
1555 | if (ev) |
… | |
… | |
2183 | cl_sampler ca; |
2193 | cl_sampler ca; |
2184 | size_t cz; |
2194 | size_t cz; |
2185 | cl_event ce; |
2195 | cl_event ce; |
2186 | } arg; |
2196 | } arg; |
2187 | size_t size; |
2197 | size_t size; |
|
|
2198 | int nullarg = 0; |
2188 | |
2199 | |
2189 | switch (type) |
2200 | switch (type) |
2190 | { |
2201 | { |
2191 | case 'c': arg.cc = SvIV (sv); size = sizeof (arg.cc); break; |
2202 | case 'c': arg.cc = SvIV (sv); size = sizeof (arg.cc); break; |
2192 | case 'C': arg.cC = SvUV (sv); size = sizeof (arg.cC); break; |
2203 | case 'C': arg.cC = SvUV (sv); size = sizeof (arg.cC); break; |
… | |
… | |
2198 | case 'L': arg.cL = SvUV (sv); size = sizeof (arg.cL); break; |
2209 | case 'L': arg.cL = SvUV (sv); size = sizeof (arg.cL); break; |
2199 | |
2210 | |
2200 | case 'h': arg.ch = SvUV (sv); size = sizeof (arg.ch); break; |
2211 | case 'h': arg.ch = SvUV (sv); size = sizeof (arg.ch); break; |
2201 | case 'f': arg.cf = SvNV (sv); size = sizeof (arg.cf); break; |
2212 | case 'f': arg.cf = SvNV (sv); size = sizeof (arg.cf); break; |
2202 | case 'd': arg.cd = SvNV (sv); size = sizeof (arg.cd); break; |
2213 | case 'd': arg.cd = SvNV (sv); size = sizeof (arg.cd); break; |
2203 | case 'z': arg.cz = SvUV (sv); size = sizeof (arg.cz); break; |
|
|
2204 | |
2214 | |
|
|
2215 | case 'z': nullarg = 1; size = SvIV (sv); break; |
|
|
2216 | |
2205 | case 'm': arg.cm = SvCLOBJ ("OpenCL::Kernel::setf", "m", sv, "OpenCL::Memory" ); size = sizeof (arg.cm); break; |
2217 | case 'm': nullarg = !SvOK (sv); arg.cm = SvCLOBJ ("OpenCL::Kernel::setf", "m", sv, "OpenCL::Memory" ); size = sizeof (arg.cm); break; |
2206 | case 'a': arg.ca = SvCLOBJ ("OpenCL::Kernel::setf", "a", sv, "OpenCL::Sampler"); size = sizeof (arg.ca); break; |
2218 | case 'a': nullarg = !SvOK (sv); arg.ca = SvCLOBJ ("OpenCL::Kernel::setf", "a", sv, "OpenCL::Sampler"); size = sizeof (arg.ca); break; |
2207 | case 'e': arg.ca = SvCLOBJ ("OpenCL::Kernel::setf", "e", sv, "OpenCL::Event" ); size = sizeof (arg.ce); break; |
2219 | case 'e': nullarg = !SvOK (sv); arg.ca = SvCLOBJ ("OpenCL::Kernel::setf", "e", sv, "OpenCL::Event" ); size = sizeof (arg.ce); break; |
2208 | |
2220 | |
2209 | default: |
2221 | default: |
2210 | croak ("OpenCL::Kernel::setf format character '%c' not supported", type); |
2222 | croak ("OpenCL::Kernel::setf format character '%c' not supported", type); |
2211 | } |
2223 | } |
2212 | |
2224 | |
2213 | clSetKernelArg (self, i - 2, size, &arg); |
2225 | res = clSetKernelArg (self, i - 2, size, nullarg ? 0 : &arg); |
|
|
2226 | if (res) |
|
|
2227 | croak ("OpenCL::Kernel::setf kernel parameter '%c' (#%d): %s", type, i - 2, err2str (res)); |
2214 | } |
2228 | } |
2215 | |
2229 | |
2216 | if (i != items) |
2230 | if (i != items) |
2217 | croak ("OpenCL::Kernel::setf format string too short (too many arguments)"); |
2231 | croak ("OpenCL::Kernel::setf format string too short (too many arguments)"); |
2218 | |
2232 | |
… | |
… | |
2272 | clSetKernelArg (self, idx, sizeof (value), &value); |
2286 | clSetKernelArg (self, idx, sizeof (value), &value); |
2273 | |
2287 | |
2274 | void |
2288 | void |
2275 | set_memory (OpenCL::Kernel self, cl_uint idx, OpenCL::Memory_ornull value) |
2289 | set_memory (OpenCL::Kernel self, cl_uint idx, OpenCL::Memory_ornull value) |
2276 | CODE: |
2290 | CODE: |
2277 | clSetKernelArg (self, idx, sizeof (value), &value); |
2291 | clSetKernelArg (self, idx, sizeof (value), value ? &value : 0); |
2278 | |
2292 | |
2279 | void |
2293 | void |
2280 | set_buffer (OpenCL::Kernel self, cl_uint idx, OpenCL::Buffer_ornull value) |
2294 | set_buffer (OpenCL::Kernel self, cl_uint idx, OpenCL::Buffer_ornull value) |
2281 | CODE: |
2295 | CODE: |
2282 | clSetKernelArg (self, idx, sizeof (value), &value); |
2296 | clSetKernelArg (self, idx, sizeof (value), value ? &value : 0); |
2283 | |
2297 | |
2284 | void |
2298 | void |
2285 | set_image (OpenCL::Kernel self, cl_uint idx, OpenCL::Image_ornull value) |
2299 | set_image (OpenCL::Kernel self, cl_uint idx, OpenCL::Image_ornull value) |
2286 | ALIAS: |
2300 | ALIAS: |
2287 | set_image2d = 0 |
2301 | set_image2d = 0 |
2288 | set_image3d = 0 |
2302 | set_image3d = 0 |
2289 | CODE: |
2303 | CODE: |
2290 | clSetKernelArg (self, idx, sizeof (value), &value); |
2304 | clSetKernelArg (self, idx, sizeof (value), value ? &value : 0); |
2291 | |
2305 | |
2292 | void |
2306 | void |
2293 | set_sampler (OpenCL::Kernel self, cl_uint idx, OpenCL::Sampler value) |
2307 | set_sampler (OpenCL::Kernel self, cl_uint idx, OpenCL::Sampler value) |
2294 | CODE: |
2308 | CODE: |
2295 | clSetKernelArg (self, idx, sizeof (value), &value); |
2309 | clSetKernelArg (self, idx, sizeof (value), &value); |