--- OpenCL/OpenCL.xs 2012/04/19 13:49:33 1.28 +++ OpenCL/OpenCL.xs 2012/04/21 18:49:21 1.37 @@ -2,6 +2,20 @@ #include "perl.h" #include "XSUB.h" +#ifdef I_DLFCN + #include +#endif + +// how stupid is that, the 1.2 header files define CL_VERSION_1_1, +// but then fail to define the api functions unless you ALSO define +// this. This breaks 100% of the opencl 1.1 apps, for what reason? +// after all, the functions are deprecated, not removed. +// in addition, you cannot test for this in any future-proof way. +// each time a new opencl version comes out, you need to make a new +// release. +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#define CL_USE_DEPRECATED_OPENCL_1_2_APIS /* just guessing, you stupid idiots */ + #ifdef __APPLE__ #include #else @@ -33,6 +47,51 @@ /*****************************************************************************/ +// name must include a leading underscore +// all of this horrors would be unneceesary if somebody wrote a proper OpenGL module +// for perl. doh. +static void * +glsym (const char *name) +{ + void *fun = 0; + + #if defined I_DLFCN && defined RTLD_DEFAULT + fun = dlsym (RTLD_DEFAULT, name + 1); + if (!fun) fun = dlsym (RTLD_DEFAULT, name); + + if (!fun) + { + static void *libgl; + static const char *glso[] = { + "libGL.so.1", + "libGL.so.3", + "libGL.so.4.0", + "libGL.so", + "/usr/lib/libGL.so", + "/usr/X11R6/lib/libGL.1.dylib" + }; + int i; + + for (i = 0; !libgl && i < sizeof (glso) / sizeof (glso [0]); ++i) + { + libgl = dlopen (glso [i], RTLD_LAZY); + if (libgl) + break; + } + + if (libgl) + { + fun = dlsym (libgl, name + 1); + if (!fun) fun = dlsym (libgl, name); + } + } + #endif + + return fun; +} + +/*****************************************************************************/ + /* up to two temporary buffers */ static void * tmpbuf (size_t size) @@ -42,7 +101,7 @@ static void *buf [buffers]; static size_t len [buffers]; - idx = ++idx % buffers; + idx = (idx + 1) % buffers; if (len [idx] < size) { @@ -156,18 +215,34 @@ while (extracount--) *l++ = *extra++; - for (i = 0; i < len; ++i) + for (i = 0; i < len; i += 2) { - cl_context_properties t = SvIV (*av_fetch (av, i, 0)); - cl_context_properties v; - - ++i; + cl_context_properties t = SvIV (*av_fetch (av, i , 0)); + SV *p_sv = *av_fetch (av, i + 1, 0); + cl_context_properties v = SvIV (p_sv); // code below can override switch (t) { + case CL_GLX_DISPLAY_KHR: + if (!SvOK (p_sv)) + { + void *func = glsym ("_glXGetCurrentDisplay"); + if (func) + v = (cl_context_properties)((void *(*)(void))func)(); + } + break; + + case CL_GL_CONTEXT_KHR: + if (!SvOK (p_sv)) + { + void *func = glsym ("_glXGetCurrentContext"); + if (func) + v = (cl_context_properties)((void *(*)(void))func)(); + } + break; + default: /* unknown property, treat as int */ - v = SvIV (*av_fetch (av, i, 0)); break; } @@ -194,22 +269,32 @@ } static cl_event * -event_list (SV **items, int count) +event_list (SV **items, cl_uint *rcount) { + cl_uint count = *rcount; + if (!count) return 0; cl_event *list = tmpbuf (sizeof (cl_event) * count); + int i = 0; + + do + { + --count; + if (SvOK (items [count])) + list [i++] = SvPTROBJ ("clEnqueue", "wait_events", items [count], "OpenCL::Event"); + } + while (count); - while (count--) - list [count] = SvPTROBJ ("clEnqueue", "wait_events", items [count], "OpenCL::Event"); + *rcount = i; - return list; + return i ? list : 0; } #define EVENT_LIST(items,count) \ cl_uint event_list_count = (count); \ - cl_event *event_list_ptr = event_list (&ST (items), event_list_count) + cl_event *event_list_ptr = event_list (&ST (items), &event_list_count) #define INFO(class) \ { \ @@ -251,7 +336,9 @@ cl_int errno () CODE: - errno = res; + RETVAL = res; + OUTPUT: + RETVAL const char * err2str (cl_int err) @@ -280,11 +367,15 @@ NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (properties, type, 0, 0, &res)); XPUSH_NEW_OBJ ("OpenCL::Context", ctx); +#if 0 + void context (cl_context_properties *properties = 0, FUTURE devices, FUTURE notify = 0) PPCODE: /* der Gipfel der Kunst */ +#endif + void wait_for_events (...) CODE: @@ -635,6 +726,8 @@ NEED_SUCCESS_ARG (cl_mem mem, CreateBuffer, (self, flags, len, ptr, &res)); XPUSH_NEW_OBJ ("OpenCL::BufferObj", mem); +#if !defined CL_VERSION_1_2 || defined CL_USE_DEPRECATED_OPENCL_1_1_APIS + void image2d (OpenCL::Context self, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, size_t width, size_t height, size_t row_pitch = 0, SV *data = &PL_sv_undef) PPCODE: @@ -653,6 +746,8 @@ NEED_SUCCESS_ARG (cl_mem mem, CreateImage3D, (self, flags, &format, width, height, depth, row_pitch, slice_pitch, ptr, &res)); XPUSH_NEW_OBJ ("OpenCL::Image3D", mem); +#endif + #if cl_apple_gl_sharing || cl_khr_gl_sharing void @@ -661,6 +756,8 @@ NEED_SUCCESS_ARG (cl_mem mem, CreateFromGLBuffer, (self, flags, bufobj, &res)); XPUSH_NEW_OBJ ("OpenCL::BufferObj", mem); +#if !defined CL_VERSION_1_2 || defined CL_USE_DEPRECATED_OPENCL_1_1_APIS + void gl_texture2d (OpenCL::Context self, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture) PPCODE: @@ -673,6 +770,8 @@ NEED_SUCCESS_ARG (cl_mem mem, CreateFromGLTexture3D, (self, flags, target, miplevel, texture, &res)); XPUSH_NEW_OBJ ("OpenCL::Image3D", mem); +#endif + void gl_renderbuffer (OpenCL::Context self, cl_mem_flags flags, cl_GLuint renderbuffer) PPCODE: @@ -792,7 +891,7 @@ char *ptr = SvPVbyte (data, len); EVENT_LIST (5, items - 5); - NEED_SUCCESS (EnqueueReadBuffer, (self, mem, blocking, offset, len, ptr, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + NEED_SUCCESS (EnqueueWriteBuffer, (self, mem, blocking, offset, len, ptr, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); @@ -868,7 +967,7 @@ if (len < min_len) croak ("clEnqueueWriteImage: data string is shorter than what would be transferred"); - NEED_SUCCESS (EnqueueWriteBufferRect, (self, buf, blocking, buf_origin, host_origin, region, buf_row_pitch, buf_slice_pitch, host_row_pitch, host_slice_pitch, SvPVX (data), event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + NEED_SUCCESS (EnqueueWriteBufferRect, (self, buf, blocking, buf_origin, host_origin, region, buf_row_pitch, buf_slice_pitch, host_row_pitch, host_slice_pitch, ptr, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); @@ -933,7 +1032,7 @@ if (len < min_len) croak ("clEnqueueWriteImage: data string is shorter than what would be transferred"); - NEED_SUCCESS (EnqueueWriteImage, (self, dst, blocking, dst_origin, region, row_pitch, slice_pitch, SvPVX (data), event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + NEED_SUCCESS (EnqueueWriteImage, (self, dst, blocking, dst_origin, region, row_pitch, slice_pitch, ptr, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); @@ -1025,7 +1124,7 @@ if (SvOK (local_work_size)) { - if (SvOK (local_work_size) && !SvROK (local_work_size) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV) + if ((SvOK (local_work_size) && !SvROK (local_work_size)) || SvTYPE (SvRV (local_work_size)) != SVt_PVAV) croak ("clEnqueueNDRangeKernel: global_work_size must be undef or an array reference"); if (AvFILLp (SvRV (local_work_size)) + 1 != gws_len) @@ -1047,7 +1146,7 @@ enqueue_acquire_gl_objects (OpenCL::Queue self, SV *objects, ...) ALIAS: enqueue_release_gl_objects = 1 - CODE: + PPCODE: if (!SvROK (objects) || SvTYPE (SvRV (objects)) != SVt_PVAV) croak ("OpenCL::Queue::enqueue_acquire/release_gl_objects argument 'objects' must be an arrayref with memory objects, in call"); @@ -1071,6 +1170,8 @@ #endif +#if !defined CL_VERSION_1_2 || defined CL_USE_DEPRECATED_OPENCL_1_1_APIS + void enqueue_marker (OpenCL::Queue self) PPCODE: @@ -1089,6 +1190,8 @@ CODE: NEED_SUCCESS (EnqueueBarrier, (self)); +#endif + void flush (OpenCL::Queue self) CODE: @@ -1247,7 +1350,7 @@ PPCODE: cl_gl_object_type type; cl_GLuint name; - NEED_SUCCESS (clGetGLObjectInfo, (self, &type, &name)); + NEED_SUCCESS (GetGLObjectInfo, (self, &type, &name)); EXTEND (SP, 2); PUSHs (sv_2mortal (newSVuv (type))); PUSHs (sv_2mortal (newSVuv (name))); @@ -1302,7 +1405,7 @@ target (OpenCL::Image self) PPCODE: cl_GLenum value [1]; - NEED_SUCCESS (GetGlTextureInfo, (self, CL_GL_TEXTURE_TARGET, sizeof (value), value, 0)); + NEED_SUCCESS (GetGLTextureInfo, (self, CL_GL_TEXTURE_TARGET, sizeof (value), value, 0)); EXTEND (SP, 1); const int i = 0; PUSHs (sv_2mortal (newSVuv (value [i]))); @@ -1311,7 +1414,7 @@ gl_mipmap_level (OpenCL::Image self) PPCODE: cl_GLint value [1]; - NEED_SUCCESS (GetGlTextureInfo, (self, CL_GL_MIPMAP_LEVEL, sizeof (value), value, 0)); + NEED_SUCCESS (GetGLTextureInfo, (self, CL_GL_MIPMAP_LEVEL, sizeof (value), value, 0)); EXTEND (SP, 1); const int i = 0; PUSHs (sv_2mortal (newSViv (value [i]))); @@ -1467,7 +1570,7 @@ SvUPGRADE (sv, SVt_PV); SvPOK_only (sv); SvCUR_set (sv, sizes [i]); - ptrs [i] = SvPVX (sv); + ptrs [i] = (void *)SvPVX (sv); PUSHs (sv); } @@ -1627,6 +1730,11 @@ clSetKernelArg (self, idx, sizeof (value), &value); void +set_local (OpenCL::Kernel self, cl_uint idx, size_t size) + CODE: + clSetKernelArg (self, idx, size, 0); + +void set_event (OpenCL::Kernel self, cl_uint idx, OpenCL::Event value) CODE: clSetKernelArg (self, idx, sizeof (value), &value);