… | |
… | |
601 | RETVAL = res; |
601 | RETVAL = res; |
602 | OUTPUT: |
602 | OUTPUT: |
603 | RETVAL |
603 | RETVAL |
604 | |
604 | |
605 | const char * |
605 | const char * |
606 | err2str (cl_int err) |
606 | err2str (cl_int err = res) |
607 | |
607 | |
608 | const char * |
608 | const char * |
609 | enum2str (cl_uint value) |
609 | enum2str (cl_uint value) |
610 | |
610 | |
611 | void |
611 | void |
… | |
… | |
1510 | |
1510 | |
1511 | lists = tmpbuf (sizeof (size_t) * 3 * gws_len); |
1511 | lists = tmpbuf (sizeof (size_t) * 3 * gws_len); |
1512 | |
1512 | |
1513 | gws = lists + gws_len * 0; |
1513 | gws = lists + gws_len * 0; |
1514 | for (i = 0; i < gws_len; ++i) |
1514 | for (i = 0; i < gws_len; ++i) |
|
|
1515 | { |
1515 | gws [i] = SvIV (AvARRAY (SvRV (global_work_size))[i]); |
1516 | gws [i] = SvIV (AvARRAY (SvRV (global_work_size))[i]); |
|
|
1517 | // at least nvidia crashes for 0-sized work group sizes, work around |
|
|
1518 | if (!gws [i]) |
|
|
1519 | croak ("clEnqueueNDRangeKernel: global_work_size[%d] is zero, must be non-zero", i); |
|
|
1520 | } |
1516 | |
1521 | |
1517 | if (SvOK (global_work_offset)) |
1522 | if (SvOK (global_work_offset)) |
1518 | { |
1523 | { |
1519 | if (!SvROK (global_work_offset) || SvTYPE (SvRV (global_work_offset)) != SVt_PVAV) |
1524 | if (!SvROK (global_work_offset) || SvTYPE (SvRV (global_work_offset)) != SVt_PVAV) |
1520 | croak ("clEnqueueNDRangeKernel: global_work_offset must be undef or an array reference"); |
1525 | croak ("clEnqueueNDRangeKernel: global_work_offset must be undef or an array reference"); |
… | |
… | |
1528 | } |
1533 | } |
1529 | |
1534 | |
1530 | if (SvOK (local_work_size)) |
1535 | if (SvOK (local_work_size)) |
1531 | { |
1536 | { |
1532 | if ((SvOK (local_work_size) && !SvROK (local_work_size)) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV) |
1537 | if ((SvOK (local_work_size) && !SvROK (local_work_size)) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV) |
1533 | croak ("clEnqueueNDRangeKernel: global_work_size must be undef or an array reference"); |
1538 | croak ("clEnqueueNDRangeKernel: local_work_size must be undef or an array reference"); |
1534 | |
1539 | |
1535 | if (AvFILLp (SvRV (local_work_size)) + 1 != gws_len) |
1540 | if (AvFILLp (SvRV (local_work_size)) + 1 != gws_len) |
1536 | croak ("clEnqueueNDRangeKernel: local_work_local must be undef or an array of same size as global_work_size"); |
1541 | croak ("clEnqueueNDRangeKernel: local_work_local must be undef or an array of same size as global_work_size"); |
1537 | |
1542 | |
1538 | lws = lists + gws_len * 2; |
1543 | lws = lists + gws_len * 2; |
1539 | for (i = 0; i < gws_len; ++i) |
1544 | for (i = 0; i < gws_len; ++i) |
|
|
1545 | { |
1540 | lws [i] = SvIV (AvARRAY (SvRV (local_work_size))[i]); |
1546 | lws [i] = SvIV (AvARRAY (SvRV (local_work_size))[i]); |
|
|
1547 | // at least nvidia crashes for 0-sized work group sizes, work around |
|
|
1548 | if (!lws [i]) |
|
|
1549 | croak ("clEnqueueNDRangeKernel: local_work_size[%d] is zero, must be non-zero", i); |
|
|
1550 | } |
1541 | } |
1551 | } |
1542 | |
1552 | |
1543 | NEED_SUCCESS (EnqueueNDRangeKernel, (self, kernel, gws_len, gwo, gws, lws, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); |
1553 | NEED_SUCCESS (EnqueueNDRangeKernel, (self, kernel, gws_len, gwo, gws, lws, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); |
1544 | |
1554 | |
1545 | if (ev) |
1555 | if (ev) |
… | |
… | |
1548 | #if cl_apple_gl_sharing || cl_khr_gl_sharing |
1558 | #if cl_apple_gl_sharing || cl_khr_gl_sharing |
1549 | |
1559 | |
1550 | void |
1560 | void |
1551 | acquire_gl_objects (OpenCL::Queue self, SV *objects, ...) |
1561 | acquire_gl_objects (OpenCL::Queue self, SV *objects, ...) |
1552 | ALIAS: |
1562 | ALIAS: |
|
|
1563 | release_gl_objects = 1 |
1553 | enqueue_acquire_gl_objects = 0 |
1564 | enqueue_acquire_gl_objects = 0 |
1554 | ALIAS: |
|
|
1555 | enqueue_release_gl_objects = 1 |
1565 | enqueue_release_gl_objects = 1 |
1556 | PPCODE: |
1566 | PPCODE: |
1557 | if (!SvROK (objects) || SvTYPE (SvRV (objects)) != SVt_PVAV) |
1567 | if (!SvROK (objects) || SvTYPE (SvRV (objects)) != SVt_PVAV) |
1558 | croak ("OpenCL::Queue::enqueue_acquire/release_gl_objects argument 'objects' must be an arrayref with memory objects, in call"); |
1568 | croak ("OpenCL::Queue::enqueue_acquire/release_gl_objects argument 'objects' must be an arrayref with memory objects, in call"); |
1559 | |
1569 | |
… | |
… | |
2154 | DESTROY (OpenCL::Kernel self) |
2164 | DESTROY (OpenCL::Kernel self) |
2155 | CODE: |
2165 | CODE: |
2156 | clReleaseKernel (self); |
2166 | clReleaseKernel (self); |
2157 | |
2167 | |
2158 | void |
2168 | void |
|
|
2169 | setf (OpenCL::Kernel self, const char *format, ...) |
|
|
2170 | CODE: |
|
|
2171 | int i; |
|
|
2172 | for (i = 2; ; ++i) |
|
|
2173 | { |
|
|
2174 | while (*format == ' ') |
|
|
2175 | ++format; |
|
|
2176 | |
|
|
2177 | char type = *format++; |
|
|
2178 | |
|
|
2179 | if (!type) |
|
|
2180 | break; |
|
|
2181 | |
|
|
2182 | if (i >= items) |
|
|
2183 | croak ("OpenCL::Kernel::setf format string too long (not enough arguments)"); |
|
|
2184 | |
|
|
2185 | SV *sv = ST (i); |
|
|
2186 | |
|
|
2187 | union |
|
|
2188 | { |
|
|
2189 | cl_char cc; cl_uchar cC; cl_short cs; cl_ushort cS; |
|
|
2190 | cl_int ci; cl_uint cI; cl_long cl; cl_ulong cL; |
|
|
2191 | cl_half ch; cl_float cf; cl_double cd; |
|
|
2192 | cl_mem cm; |
|
|
2193 | cl_sampler ca; |
|
|
2194 | size_t cz; |
|
|
2195 | cl_event ce; |
|
|
2196 | } arg; |
|
|
2197 | size_t size; |
|
|
2198 | int nullarg = 0; |
|
|
2199 | |
|
|
2200 | switch (type) |
|
|
2201 | { |
|
|
2202 | case 'c': arg.cc = SvIV (sv); size = sizeof (arg.cc); break; |
|
|
2203 | case 'C': arg.cC = SvUV (sv); size = sizeof (arg.cC); break; |
|
|
2204 | case 's': arg.cs = SvIV (sv); size = sizeof (arg.cs); break; |
|
|
2205 | case 'S': arg.cS = SvUV (sv); size = sizeof (arg.cS); break; |
|
|
2206 | case 'i': arg.ci = SvIV (sv); size = sizeof (arg.ci); break; |
|
|
2207 | case 'I': arg.cI = SvUV (sv); size = sizeof (arg.cI); break; |
|
|
2208 | case 'l': arg.cl = SvIV (sv); size = sizeof (arg.cl); break; |
|
|
2209 | case 'L': arg.cL = SvUV (sv); size = sizeof (arg.cL); break; |
|
|
2210 | |
|
|
2211 | case 'h': arg.ch = SvUV (sv); size = sizeof (arg.ch); break; |
|
|
2212 | case 'f': arg.cf = SvNV (sv); size = sizeof (arg.cf); break; |
|
|
2213 | case 'd': arg.cd = SvNV (sv); size = sizeof (arg.cd); break; |
|
|
2214 | |
|
|
2215 | case 'z': nullarg = 1; size = SvIV (sv); break; |
|
|
2216 | |
|
|
2217 | case 'm': nullarg = !SvOK (sv); arg.cm = SvCLOBJ ("OpenCL::Kernel::setf", "m", sv, "OpenCL::Memory" ); size = sizeof (arg.cm); break; |
|
|
2218 | case 'a': nullarg = !SvOK (sv); arg.ca = SvCLOBJ ("OpenCL::Kernel::setf", "a", sv, "OpenCL::Sampler"); size = sizeof (arg.ca); break; |
|
|
2219 | case 'e': nullarg = !SvOK (sv); arg.ca = SvCLOBJ ("OpenCL::Kernel::setf", "e", sv, "OpenCL::Event" ); size = sizeof (arg.ce); break; |
|
|
2220 | |
|
|
2221 | default: |
|
|
2222 | croak ("OpenCL::Kernel::setf format character '%c' not supported", type); |
|
|
2223 | } |
|
|
2224 | |
|
|
2225 | res = clSetKernelArg (self, i - 2, size, nullarg ? 0 : &arg); |
|
|
2226 | if (res) |
|
|
2227 | croak ("OpenCL::Kernel::setf kernel parameter '%c' (#%d): %s", type, i - 2, err2str (res)); |
|
|
2228 | } |
|
|
2229 | |
|
|
2230 | if (i != items) |
|
|
2231 | croak ("OpenCL::Kernel::setf format string too short (too many arguments)"); |
|
|
2232 | |
|
|
2233 | void |
2159 | set_char (OpenCL::Kernel self, cl_uint idx, cl_char value) |
2234 | set_char (OpenCL::Kernel self, cl_uint idx, cl_char value) |
2160 | CODE: |
2235 | CODE: |
2161 | clSetKernelArg (self, idx, sizeof (value), &value); |
2236 | clSetKernelArg (self, idx, sizeof (value), &value); |
2162 | |
2237 | |
2163 | void |
2238 | void |
… | |
… | |
2211 | clSetKernelArg (self, idx, sizeof (value), &value); |
2286 | clSetKernelArg (self, idx, sizeof (value), &value); |
2212 | |
2287 | |
2213 | void |
2288 | void |
2214 | set_memory (OpenCL::Kernel self, cl_uint idx, OpenCL::Memory_ornull value) |
2289 | set_memory (OpenCL::Kernel self, cl_uint idx, OpenCL::Memory_ornull value) |
2215 | CODE: |
2290 | CODE: |
2216 | clSetKernelArg (self, idx, sizeof (value), &value); |
2291 | clSetKernelArg (self, idx, sizeof (value), value ? &value : 0); |
2217 | |
2292 | |
2218 | void |
2293 | void |
2219 | set_buffer (OpenCL::Kernel self, cl_uint idx, OpenCL::Buffer_ornull value) |
2294 | set_buffer (OpenCL::Kernel self, cl_uint idx, OpenCL::Buffer_ornull value) |
2220 | CODE: |
2295 | CODE: |
2221 | clSetKernelArg (self, idx, sizeof (value), &value); |
2296 | clSetKernelArg (self, idx, sizeof (value), value ? &value : 0); |
2222 | |
2297 | |
2223 | void |
2298 | void |
2224 | set_image (OpenCL::Kernel self, cl_uint idx, OpenCL::Image_ornull value) |
2299 | set_image (OpenCL::Kernel self, cl_uint idx, OpenCL::Image_ornull value) |
2225 | ALIAS: |
2300 | ALIAS: |
2226 | set_image2d = 0 |
2301 | set_image2d = 0 |
2227 | set_image3d = 0 |
2302 | set_image3d = 0 |
2228 | CODE: |
2303 | CODE: |
2229 | clSetKernelArg (self, idx, sizeof (value), &value); |
2304 | clSetKernelArg (self, idx, sizeof (value), value ? &value : 0); |
2230 | |
2305 | |
2231 | void |
2306 | void |
2232 | set_sampler (OpenCL::Kernel self, cl_uint idx, OpenCL::Sampler value) |
2307 | set_sampler (OpenCL::Kernel self, cl_uint idx, OpenCL::Sampler value) |
2233 | CODE: |
2308 | CODE: |
2234 | clSetKernelArg (self, idx, sizeof (value), &value); |
2309 | clSetKernelArg (self, idx, sizeof (value), &value); |