--- OpenCL/OpenCL.xs 2012/04/16 09:43:30 1.23 +++ OpenCL/OpenCL.xs 2012/04/24 14:24:42 1.48 @@ -2,12 +2,38 @@ #include "perl.h" #include "XSUB.h" +#ifdef I_DLFCN + #include +#endif + +// how stupid is that, the 1.2 header files define CL_VERSION_1_1, +// but then fail to define the api functions unless you ALSO define +// this. This breaks 100% of the opencl 1.1 apps, for what reason? +// after all, the functions are deprecated, not removed. +// in addition, you cannot test for this in any future-proof way. +// each time a new opencl version comes out, you need to make a new +// release. +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS /* just guessing, you stupid idiots */ + +#ifndef PREFER_1_1 + #define PREFER_1_1 1 +#endif + +#if PREFER_1_1 + #define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#endif + #ifdef __APPLE__ #include #else #include #endif +#ifndef CL_VERSION_1_2 + #undef PREFER_1_1 + #define PREFER_1_1 1 +#endif + typedef cl_platform_id OpenCL__Platform; typedef cl_device_id OpenCL__Device; typedef cl_context OpenCL__Context; @@ -33,15 +59,61 @@ /*****************************************************************************/ +// name must include a leading underscore +// all of this horrors would be unneceesary if somebody wrote a proper OpenGL module +// for perl. doh. +static void * +glsym (const char *name) +{ + void *fun = 0; + + #if defined I_DLFCN && defined RTLD_DEFAULT + fun = dlsym (RTLD_DEFAULT, name + 1); + if (!fun) fun = dlsym (RTLD_DEFAULT, name); + + if (!fun) + { + static void *libgl; + static const char *glso[] = { + "libGL.so.1", + "libGL.so.3", + "libGL.so.4.0", + "libGL.so", + "/usr/lib/libGL.so", + "/usr/X11R6/lib/libGL.1.dylib" + }; + int i; + + for (i = 0; !libgl && i < sizeof (glso) / sizeof (glso [0]); ++i) + { + libgl = dlopen (glso [i], RTLD_LAZY); + if (libgl) + break; + } + + if (libgl) + { + fun = dlsym (libgl, name + 1); + if (!fun) fun = dlsym (libgl, name); + } + } + #endif + + return fun; +} + +/*****************************************************************************/ + /* up to two temporary buffers */ static void * tmpbuf (size_t size) { + enum { buffers = 3 }; static int idx; - static void *buf [2]; - static size_t len [2]; + static void *buf [buffers]; + static size_t len [buffers]; - idx ^= 1; + idx = (idx + 1) % buffers; if (len [idx] < size) { @@ -133,6 +205,73 @@ /*****************************************************************************/ +static cl_context_properties * +SvCONTEXTPROPERTIES (const char *func, const char *svname, SV *sv, cl_context_properties *extra, int extracount) +{ + if (!sv || !SvOK (sv)) + if (extra) + sv = sv_2mortal (newRV_noinc ((SV *)newAV ())); // slow, but rarely used hopefully + else + return 0; + + if (SvROK (sv) && SvTYPE (SvRV (sv)) == SVt_PVAV) + { + AV *av = (AV *)SvRV (sv); + int i, len = av_len (av) + 1; + cl_context_properties *p = tmpbuf (sizeof (cl_context_properties) * (len + extracount + 1)); + cl_context_properties *l = p; + + if (len & 1) + croak ("%s: %s is not a property list (must be even number of elements)", func, svname); + + while (extracount--) + *l++ = *extra++; + + for (i = 0; i < len; i += 2) + { + cl_context_properties t = SvIV (*av_fetch (av, i , 0)); + SV *p_sv = *av_fetch (av, i + 1, 0); + cl_context_properties v = SvIV (p_sv); // code below can override + + switch (t) + { + case CL_GLX_DISPLAY_KHR: + if (!SvOK (p_sv)) + { + void *func = glsym ("_glXGetCurrentDisplay"); + if (func) + v = (cl_context_properties)((void *(*)(void))func)(); + } + break; + + case CL_GL_CONTEXT_KHR: + if (!SvOK (p_sv)) + { + void *func = glsym ("_glXGetCurrentContext"); + if (func) + v = (cl_context_properties)((void *(*)(void))func)(); + } + break; + + default: + /* unknown property, treat as int */ + break; + } + + *l++ = t; + *l++ = v; + } + + *l = 0; + + return p; + } + + croak ("%s: %s is not a property list (either undef or [type => value, ...])", func, svname); +} + +/*****************************************************************************/ + static size_t img_row_pitch (cl_mem img) { @@ -142,22 +281,32 @@ } static cl_event * -event_list (SV **items, int count) +event_list (SV **items, cl_uint *rcount) { + cl_uint count = *rcount; + if (!count) return 0; cl_event *list = tmpbuf (sizeof (cl_event) * count); + int i = 0; + + do + { + --count; + if (SvOK (items [count])) + list [i++] = SvPTROBJ ("clEnqueue", "wait_events", items [count], "OpenCL::Event"); + } + while (count); - while (count--) - list [count] = SvPTROBJ ("clEnqueue", "wait_events", items [count], "OpenCL::Event"); + *rcount = i; - return list; + return i ? list : 0; } #define EVENT_LIST(items,count) \ cl_uint event_list_count = (count); \ - cl_event *event_list_ptr = event_list (&ST (items), event_list_count) + cl_event *event_list_ptr = event_list (&ST (items), &event_list_count) #define INFO(class) \ { \ @@ -177,29 +326,31 @@ BOOT: { - HV *stash = gv_stashpv ("OpenCL", 1); - static const ivstr *civ, const_iv[] = { - { sizeof (cl_char ), "SIZEOF_CHAR" }, - { sizeof (cl_uchar ), "SIZEOF_UCHAR" }, - { sizeof (cl_short ), "SIZEOF_SHORT" }, - { sizeof (cl_ushort), "SIZEOF_USHORT" }, - { sizeof (cl_int ), "SIZEOF_INT" }, - { sizeof (cl_uint ), "SIZEOF_UINT" }, - { sizeof (cl_long ), "SIZEOF_LONG" }, - { sizeof (cl_ulong ), "SIZEOF_ULONG" }, - { sizeof (cl_half ), "SIZEOF_HALF" }, - { sizeof (cl_float ), "SIZEOF_FLOAT" }, - { sizeof (cl_double), "SIZEOF_DOUBLE" }, + HV *stash = gv_stashpv ("OpenCL", 1); + static const ivstr *civ, const_iv[] = { + { sizeof (cl_char ), "SIZEOF_CHAR" }, + { sizeof (cl_uchar ), "SIZEOF_UCHAR" }, + { sizeof (cl_short ), "SIZEOF_SHORT" }, + { sizeof (cl_ushort), "SIZEOF_USHORT" }, + { sizeof (cl_int ), "SIZEOF_INT" }, + { sizeof (cl_uint ), "SIZEOF_UINT" }, + { sizeof (cl_long ), "SIZEOF_LONG" }, + { sizeof (cl_ulong ), "SIZEOF_ULONG" }, + { sizeof (cl_half ), "SIZEOF_HALF" }, + { sizeof (cl_float ), "SIZEOF_FLOAT" }, + { sizeof (cl_double), "SIZEOF_DOUBLE" }, #include "constiv.h" - }; - for (civ = const_iv + sizeof (const_iv) / sizeof (const_iv [0]); civ > const_iv; civ--) - newCONSTSUB (stash, (char *)civ[-1].name, newSViv (civ[-1].iv)); + }; + for (civ = const_iv + sizeof (const_iv) / sizeof (const_iv [0]); civ > const_iv; civ--) + newCONSTSUB (stash, (char *)civ[-1].name, newSViv (civ[-1].iv)); } cl_int errno () CODE: - errno = res; + RETVAL = res; + OUTPUT: + RETVAL const char * err2str (cl_int err) @@ -223,16 +374,20 @@ PUSHs (NEW_MORTAL_OBJ ("OpenCL::Platform", list [i])); void -context_from_type (FUTURE properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, FUTURE notify = 0) +context_from_type (cl_context_properties *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, FUTURE notify = 0) PPCODE: - NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (0, type, 0, 0, &res)); + NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (properties, type, 0, 0, &res)); XPUSH_NEW_OBJ ("OpenCL::Context", ctx); +#if 0 + void -context (FUTURE properties, FUTURE devices, FUTURE notify = 0) +context (cl_context_properties *properties = 0, FUTURE devices, FUTURE notify = 0) PPCODE: /* der Gipfel der Kunst */ +#endif + void wait_for_events (...) CODE: @@ -248,6 +403,13 @@ PPCODE: INFO (Platform) +void +unload_compiler (OpenCL::Platform self) + CODE: +#if CL_VERSION_1_2 + clUnloadPlatformCompiler (self); +#endif + #BEGIN:platform void @@ -285,10 +447,10 @@ PUSHs (sv_setref_pv (sv_newmortal (), "OpenCL::Device", list [i])); void -context (OpenCL::Platform self, FUTURE properties, SV *devices, FUTURE notify = 0) +context (OpenCL::Platform self, cl_context_properties *properties = 0, SV *devices, FUTURE notify = 0) PPCODE: if (!SvROK (devices) || SvTYPE (SvRV (devices)) != SVt_PVAV) - croak ("OpenCL::Platform argument 'device' must be an arrayref with device objects, in call"); + croak ("OpenCL::Platform::context argument 'device' must be an arrayref with device objects, in call"); AV *av = (AV *)SvRV (devices); cl_uint num_devices = av_len (av) + 1; @@ -298,13 +460,14 @@ for (i = num_devices; i--; ) device_list [i] = SvPTROBJ ("clCreateContext", "devices", *av_fetch (av, i, 0), "OpenCL::Device"); - NEED_SUCCESS_ARG (cl_context ctx, CreateContext, (0, num_devices, device_list, 0, 0, &res)); + NEED_SUCCESS_ARG (cl_context ctx, CreateContext, (properties, num_devices, device_list, 0, 0, &res)); XPUSH_NEW_OBJ ("OpenCL::Context", ctx); void -context_from_type (OpenCL::Platform self, FUTURE properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, FUTURE notify = 0) +context_from_type (OpenCL::Platform self, SV *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, FUTURE notify = 0) PPCODE: - cl_context_properties props[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)self, 0 }; + cl_context_properties extra[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)self }; + cl_context_properties *props = SvCONTEXTPROPERTIES ("OpenCL::Platform::context_from_type", "properties", properties, extra, 2); NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (props, type, 0, 0, &res)); XPUSH_NEW_OBJ ("OpenCL::Context", ctx); @@ -355,7 +518,7 @@ native_vector_width_float = CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT native_vector_width_double = CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE native_vector_width_half = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF - reference_count_ext = CL_DEVICE_REFERENCE_COUNT_EXT + reference_count_ext = CL_DEVICE_REFERENCE_COUNT_EXT PPCODE: cl_uint value [1]; NEED_SUCCESS (GetDeviceInfo, (self, ix, sizeof (value), value, 0)); @@ -567,7 +730,7 @@ buffer (OpenCL::Context self, cl_mem_flags flags, size_t len) PPCODE: if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) - croak ("clCreateBuffer: cannot use/copy host ptr when no data is given, use $context->buffer_sv instead?"); + croak ("OpenCL::Context::buffer: cannot use/copy host ptr when no data is given, use $context->buffer_sv instead?"); NEED_SUCCESS_ARG (cl_mem mem, CreateBuffer, (self, flags, len, 0, &res)); XPUSH_NEW_OBJ ("OpenCL::BufferObj", mem); @@ -578,17 +741,52 @@ STRLEN len; char *ptr = SvOK (data) ? SvPVbyte (data, len) : 0; if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) - croak ("clCreateBuffer: have to specify use or copy host ptr when buffer data is given, use $context->buffer instead?"); + croak ("OpenCL::Context::buffer_sv: you have to specify use or copy host ptr when buffer data is given, use $context->buffer instead?"); NEED_SUCCESS_ARG (cl_mem mem, CreateBuffer, (self, flags, len, ptr, &res)); XPUSH_NEW_OBJ ("OpenCL::BufferObj", mem); +#if CL_VERSION_1_2 + +void +image (OpenCL::Context self, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, cl_mem_object_type type, size_t width, size_t height, size_t depth, size_t array_size = 0, size_t row_pitch = 0, size_t slice_pitch = 0, cl_uint num_mip_level = 0, cl_uint num_samples = 0, SV *data = &PL_sv_undef) + PPCODE: + STRLEN len; + char *ptr = SvOK (data) ? SvPVbyte (data, len) : 0; + const cl_image_format format = { channel_order, channel_type }; + const cl_image_desc desc = { + type, + width, height, depth, + array_size, row_pitch, slice_pitch, + num_mip_level, num_samples, + type == CL_MEM_OBJECT_IMAGE1D_BUFFER ? (cl_mem)SvPTROBJ ("OpenCL::Context::Image", "data", data, "OpenCL::Buffer") : 0 + }; + NEED_SUCCESS_ARG (cl_mem mem, CreateImage, (self, flags, &format, &desc, ptr, &res)); + char *klass = "OpenCL::Image"; + switch (type) + { + case CL_MEM_OBJECT_IMAGE1D_BUFFER: klass = "OpenCL::Image1DBuffer"; break; + case CL_MEM_OBJECT_IMAGE1D: klass = "OpenCL::Image1D"; break; + case CL_MEM_OBJECT_IMAGE1D_ARRAY: klass = "OpenCL::Image2DArray"; break; + case CL_MEM_OBJECT_IMAGE2D: klass = "OpenCL::Image2D"; break; + case CL_MEM_OBJECT_IMAGE2D_ARRAY: klass = "OpenCL::Image2DArray"; break; + case CL_MEM_OBJECT_IMAGE3D: klass = "OpenCL::Image3D"; break; + } + XPUSH_NEW_OBJ (klass, mem); + +#endif + void image2d (OpenCL::Context self, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, size_t width, size_t height, size_t row_pitch = 0, SV *data = &PL_sv_undef) PPCODE: STRLEN len; char *ptr = SvOK (data) ? SvPVbyte (data, len) : 0; const cl_image_format format = { channel_order, channel_type }; +#if PREFER_1_1 NEED_SUCCESS_ARG (cl_mem mem, CreateImage2D, (self, flags, &format, width, height, row_pitch, ptr, &res)); +#else + const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, row_pitch, 0, 0, 0, 0 }; + NEED_SUCCESS_ARG (cl_mem mem, CreateImage, (self, flags, &format, &desc, ptr, &res)); +#endif XPUSH_NEW_OBJ ("OpenCL::Image2D", mem); void @@ -597,9 +795,73 @@ STRLEN len; char *ptr = SvOK (data) ? SvPVbyte (data, len) : 0; const cl_image_format format = { channel_order, channel_type }; +#if PREFER_1_1 NEED_SUCCESS_ARG (cl_mem mem, CreateImage3D, (self, flags, &format, width, height, depth, row_pitch, slice_pitch, ptr, &res)); +#else + const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE3D, width, height, depth, 0, row_pitch, slice_pitch, 0, 0, 0 }; + NEED_SUCCESS_ARG (cl_mem mem, CreateImage, (self, flags, &format, &desc, ptr, &res)); +#endif XPUSH_NEW_OBJ ("OpenCL::Image3D", mem); +#if cl_apple_gl_sharing || cl_khr_gl_sharing + +void +gl_buffer (OpenCL::Context self, cl_mem_flags flags, cl_GLuint bufobj) + PPCODE: + NEED_SUCCESS_ARG (cl_mem mem, CreateFromGLBuffer, (self, flags, bufobj, &res)); + XPUSH_NEW_OBJ ("OpenCL::BufferObj", mem); + +void +gl_renderbuffer (OpenCL::Context self, cl_mem_flags flags, cl_GLuint renderbuffer) + PPCODE: + NEED_SUCCESS_ARG (cl_mem mem, CreateFromGLRenderbuffer, (self, flags, renderbuffer, &res)); + XPUSH_NEW_OBJ ("OpenCL::Image2D", mem); + +#if CL_VERSION_1_2 + +void +gl_texture (OpenCL::Context self, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture) + ALIAS: + PPCODE: + NEED_SUCCESS_ARG (cl_mem mem, CreateFromGLTexture, (self, flags, target, miplevel, texture, &res)); + cl_gl_object_type type; + NEED_SUCCESS (GetGLObjectInfo, (mem, &type, 0)); // TODO: use target instead? + char *klass = "OpenCL::Memory"; + switch (type) + { + case CL_GL_OBJECT_TEXTURE_BUFFER: klass = "OpenCL::Image1DBuffer"; break; + case CL_GL_OBJECT_TEXTURE1D: klass = "OpenCL::Image1D"; break; + case CL_GL_OBJECT_TEXTURE1D_ARRAY: klass = "OpenCL::Image2DArray"; break; + case CL_GL_OBJECT_TEXTURE2D: klass = "OpenCL::Image2D"; break; + case CL_GL_OBJECT_TEXTURE2D_ARRAY: klass = "OpenCL::Image2DArray"; break; + case CL_GL_OBJECT_TEXTURE3D: klass = "OpenCL::Image3D"; break; + } + XPUSH_NEW_OBJ (klass, mem); + +#endif + +void +gl_texture2d (OpenCL::Context self, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture) + PPCODE: +#if PREFER_1_1 + NEED_SUCCESS_ARG (cl_mem mem, CreateFromGLTexture2D, (self, flags, target, miplevel, texture, &res)); +#else + NEED_SUCCESS_ARG (cl_mem mem, CreateFromGLTexture , (self, flags, target, miplevel, texture, &res)); +#endif + XPUSH_NEW_OBJ ("OpenCL::Image2D", mem); + +void +gl_texture3d (OpenCL::Context self, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture) + PPCODE: +#if PREFER_1_1 + NEED_SUCCESS_ARG (cl_mem mem, CreateFromGLTexture3D, (self, flags, target, miplevel, texture, &res)); +#else + NEED_SUCCESS_ARG (cl_mem mem, CreateFromGLTexture , (self, flags, target, miplevel, texture, &res)); +#endif + XPUSH_NEW_OBJ ("OpenCL::Image3D", mem); + +#endif + void supported_image_formats (OpenCL::Context self, cl_mem_flags flags, cl_mem_object_type image_type) PPCODE: @@ -711,12 +973,55 @@ char *ptr = SvPVbyte (data, len); EVENT_LIST (5, items - 5); - NEED_SUCCESS (EnqueueReadBuffer, (self, mem, blocking, offset, len, ptr, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + NEED_SUCCESS (EnqueueWriteBuffer, (self, mem, blocking, offset, len, ptr, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + + if (ev) + XPUSH_NEW_OBJ ("OpenCL::Event", ev); + +#if CL_VERSION_1_2 + +void +enqueue_fill_buffer (OpenCL::Queue self, OpenCL::Buffer mem, SV *data, size_t offset, size_t size, ...) + PPCODE: + cl_event ev = 0; + STRLEN len; + char *ptr = SvPVbyte (data, len); + EVENT_LIST (5, items - 5); + + NEED_SUCCESS (EnqueueFillBuffer, (self, mem, ptr, len, offset, size, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); void +enqueue_fill_image (OpenCL::Queue self, OpenCL::Image img, NV r, NV g, NV b, NV a, size_t x, size_t y, size_t z, size_t width, size_t height, size_t depth, ...) + PPCODE: + cl_event ev = 0; + STRLEN len; + const size_t origin [3] = { x, y, z }; + const size_t region [3] = { width, height, depth }; + EVENT_LIST (12, items - 12); + + const cl_float c_f [4] = { r, g, b, a }; + const cl_uint c_u [4] = { r, g, b, a }; + const cl_int c_s [4] = { r, g, b, a }; + const void *c_fus [3] = { &c_f, &c_u, &c_s }; + static const char fus [] = { 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 1, 1, 0, 0 }; + cl_image_format format; + NEED_SUCCESS (clGetImageInfo, (img, CL_IMAGE_FORMAT, sizeof (format), &format, 0)); + assert (sizeof (fus) == CL_FLOAT + 1 - CL_SNORM_INT8); + if (format.image_channel_data_type < CL_SNORM_INT8 || CL_FLOAT < format.image_channel_data_type) + croak ("enqueue_fill_image: image has unsupported channel type, only opencl 1.2 channel types supported."); + + NEED_SUCCESS (EnqueueFillImage, (self, img, c_fus [fus [format.image_channel_data_type]], + origin, region, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + + if (ev) + XPUSH_NEW_OBJ ("OpenCL::Event", ev); + +#endif + +void enqueue_copy_buffer (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_offset, size_t dst_offset, size_t len, ...) PPCODE: cl_event ev = 0; @@ -787,7 +1092,7 @@ if (len < min_len) croak ("clEnqueueWriteImage: data string is shorter than what would be transferred"); - NEED_SUCCESS (EnqueueWriteBufferRect, (self, buf, blocking, buf_origin, host_origin, region, buf_row_pitch, buf_slice_pitch, host_row_pitch, host_slice_pitch, SvPVX (data), event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + NEED_SUCCESS (EnqueueWriteBufferRect, (self, buf, blocking, buf_origin, host_origin, region, buf_row_pitch, buf_slice_pitch, host_row_pitch, host_slice_pitch, ptr, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); @@ -852,7 +1157,7 @@ if (len < min_len) croak ("clEnqueueWriteImage: data string is shorter than what would be transferred"); - NEED_SUCCESS (EnqueueWriteImage, (self, dst, blocking, dst_origin, region, row_pitch, slice_pitch, SvPVX (data), event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + NEED_SUCCESS (EnqueueWriteImage, (self, dst, blocking, dst_origin, region, row_pitch, slice_pitch, ptr, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); @@ -944,7 +1249,7 @@ if (SvOK (local_work_size)) { - if (SvOK (local_work_size) && !SvROK (local_work_size) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV) + if ((SvOK (local_work_size) && !SvROK (local_work_size)) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV) croak ("clEnqueueNDRangeKernel: global_work_size must be undef or an array reference"); if (AvFILLp (SvRV (local_work_size)) + 1 != gws_len) @@ -960,23 +1265,91 @@ if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); +#if cl_apple_gl_sharing || cl_khr_gl_sharing + void -enqueue_marker (OpenCL::Queue self) +enqueue_acquire_gl_objects (OpenCL::Queue self, SV *objects, ...) + ALIAS: + enqueue_release_gl_objects = 1 PPCODE: - cl_event ev; - NEED_SUCCESS (EnqueueMarker, (self, &ev)); - XPUSH_NEW_OBJ ("OpenCL::Event", ev); + if (!SvROK (objects) || SvTYPE (SvRV (objects)) != SVt_PVAV) + croak ("OpenCL::Queue::enqueue_acquire/release_gl_objects argument 'objects' must be an arrayref with memory objects, in call"); + + cl_event ev = 0; + EVENT_LIST (2, items - 2); + AV *av = (AV *)SvRV (objects); + cl_uint num_objects = av_len (av) + 1; + cl_mem *object_list = tmpbuf (sizeof (cl_mem) * num_objects); + int i; + + for (i = num_objects; i--; ) + object_list [i] = SvPTROBJ ("OpenCL::Queue::enqueue_acquire/release_gl_objects", "objects", *av_fetch (av, i, 0), "OpenCL::Memory"); + + if (ix) + NEED_SUCCESS (EnqueueReleaseGLObjects, (self, num_objects, object_list, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + else + NEED_SUCCESS (EnqueueAcquireGLObjects, (self, num_objects, object_list, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + + if (ev) + XPUSH_NEW_OBJ ("OpenCL::Event", ev); + +#endif void enqueue_wait_for_events (OpenCL::Queue self, ...) CODE: EVENT_LIST (1, items - 1); +#if PREFER_1_1 NEED_SUCCESS (EnqueueWaitForEvents, (self, event_list_count, event_list_ptr)); +#else + NEED_SUCCESS (EnqueueBarrierWithWaitList, (self, event_list_count, event_list_ptr, 0)); +#endif void -enqueue_barrier (OpenCL::Queue self) - CODE: - NEED_SUCCESS (EnqueueBarrier, (self)); +enqueue_marker (OpenCL::Queue self, ...) + PPCODE: + cl_event ev = 0; + EVENT_LIST (1, items - 1); +#if PREFER_1_1 + if (!event_list_count) + NEED_SUCCESS (EnqueueMarker, (self, GIMME_V != G_VOID ? &ev : 0)); + else +#if CL_VERSION_1_2 + NEED_SUCCESS (EnqueueMarkerWithWaitList, (self, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); +#else + NEED_SUCCESS (EnqueueWaitForEvents, (self, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); // also a barrier +#endif +#else + NEED_SUCCESS (EnqueueMarkerWithWaitList, (self, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); +#endif + if (ev) + XPUSH_NEW_OBJ ("OpenCL::Event", ev); + +void +enqueue_barrier (OpenCL::Queue self, ...) + PPCODE: + cl_event ev = 0; + EVENT_LIST (1, items - 1); +#if PREFER_1_1 + if (!event_list_count && GIMME_V == G_VOID) + NEED_SUCCESS (EnqueueBarrier, (self)); + else +#if CL_VERSION_1_2 + NEED_SUCCESS (EnqueueBarrierWithWaitList, (self, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); +#else + { + if (event_list_count) + NEED_SUCCESS (EnqueueWaitForEvents, (self, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + + if (GIMME_V != G_VOID) + NEED_SUCCESS (EnqueueMarker, (self, &ev)); + } +#endif +#else + NEED_SUCCESS (EnqueueBarrierWithWaitList, (self, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); +#endif + if (ev) + XPUSH_NEW_OBJ ("OpenCL::Event", ev); void flush (OpenCL::Queue self) @@ -1129,6 +1502,20 @@ #END:mem +#if cl_apple_gl_sharing || cl_khr_gl_sharing + +void +gl_object_info (OpenCL::Memory self) + PPCODE: + cl_gl_object_type type; + cl_GLuint name; + NEED_SUCCESS (GetGLObjectInfo, (self, &type, &name)); + EXTEND (SP, 2); + PUSHs (sv_2mortal (newSVuv (type))); + PUSHs (sv_2mortal (newSVuv (name))); + +#endif + MODULE = OpenCL PACKAGE = OpenCL::BufferObj void @@ -1169,6 +1556,32 @@ #END:image +#if cl_apple_gl_sharing || cl_khr_gl_sharing + +#BEGIN:gl_texture + +void +target (OpenCL::Image self) + PPCODE: + cl_GLenum value [1]; + NEED_SUCCESS (GetGLTextureInfo, (self, CL_GL_TEXTURE_TARGET, sizeof (value), value, 0)); + EXTEND (SP, 1); + const int i = 0; + PUSHs (sv_2mortal (newSVuv (value [i]))); + +void +gl_mipmap_level (OpenCL::Image self) + PPCODE: + cl_GLint value [1]; + NEED_SUCCESS (GetGLTextureInfo, (self, CL_GL_MIPMAP_LEVEL, sizeof (value), value, 0)); + EXTEND (SP, 1); + const int i = 0; + PUSHs (sv_2mortal (newSViv (value [i]))); + +#END:gl_texture + +#endif + MODULE = OpenCL PACKAGE = OpenCL::Sampler void @@ -1291,6 +1704,19 @@ XPUSH_NEW_OBJ ("OpenCL::Kernel", kernel); void +kernels_in_program (OpenCL::Program program) + PPCODE: + cl_uint num_kernels; + NEED_SUCCESS (CreateKernelsInProgram, (program, 0, 0, &num_kernels)); + cl_kernel *kernels = tmpbuf (sizeof (cl_kernel) * num_kernels); + NEED_SUCCESS (CreateKernelsInProgram, (program, num_kernels, kernels, 0)); + + int i; + EXTEND (SP, num_kernels); + for (i = 0; i < num_kernels; ++i) + PUSHs (NEW_MORTAL_OBJ ("OpenCL::Kernel", kernels [i])); + +void info (OpenCL::Program self, cl_program_info name) PPCODE: INFO (Program) @@ -1316,7 +1742,7 @@ SvUPGRADE (sv, SVt_PV); SvPOK_only (sv); SvCUR_set (sv, sizes [i]); - ptrs [i] = SvPVX (sv); + ptrs [i] = (void *)SvPVX (sv); PUSHs (sv); } @@ -1476,6 +1902,11 @@ clSetKernelArg (self, idx, sizeof (value), &value); void +set_local (OpenCL::Kernel self, cl_uint idx, size_t size) + CODE: + clSetKernelArg (self, idx, size, 0); + +void set_event (OpenCL::Kernel self, cl_uint idx, OpenCL::Event value) CODE: clSetKernelArg (self, idx, sizeof (value), &value);