--- OpenCL/OpenCL.xs 2012/04/24 22:45:38 1.51 +++ OpenCL/OpenCL.xs 2012/04/25 21:48:45 1.56 @@ -272,50 +272,11 @@ SvCLOBJ (const char *func, const char *svname, SV *sv, const char *pkg) { if (SvROK (sv) && sv_derived_from (sv, pkg)) - { - SV *rv = SvRV (sv); - - if (SvTYPE (rv) == SVt_PVAV) - rv = AvARRAY (rv)[0]; - - return (void *)SvIV (SvRV (sv)); - } + return (void *)SvIV (SvRV (sv)); croak ("%s: %s is not of type %s", func, svname, pkg); } -static void -CLOBJ_push (SV *self, SV *data) -{ - SV *rv = SvRV (self); - - if (SvTYPE (rv) != SVt_PVAV) - { - AV *av = newAV (); - av_push (av, rv); - rv = (SV *)av; - SvRV_set (self, rv); - } - - av_push ((AV *)rv, data); -} - -static SV * -sv_struct (STRLEN size) -{ - SV *sv = newSV (size); - SvPOK_only (sv); - return sv; -} - -static void * -CLOBJ_push_struct (SV *self, STRLEN size) -{ - SV *sv = sv_struct (size); - CLOBJ_push (self, sv); - return SvPVX (sv); -} - /*****************************************************************************/ /* callback stuff */ @@ -350,8 +311,6 @@ { eq_item *item = malloc (sizeof (eq_item)); - printf ("enq(%p,%p,%p,%p,%p)\n", vtbl, cb, data1, data2, data3);//D - item->next = 0; item->vtbl = vtbl; item->cb = cb; @@ -385,15 +344,6 @@ return res; } -#if 0 -static void -mem_free (pTHX_ void *p) -{ - free (p); -} -//SAVEDESTRUCTOR_X (mem_free, item); -#endif - static void eq_poll (void) { @@ -430,6 +380,7 @@ eq_poll (); } +/*****************************************************************************/ /* context notify */ static void @@ -439,10 +390,45 @@ PUSHs (sv_2mortal (newSVpv (data1, 0))); PUSHs (sv_2mortal (newSVpvn (data2, (STRLEN)data3))); PUTBACK; + + free (data1); + free (data2); } static eq_vtbl eq_context_vtbl = { 0, eq_context_push }; +static void CL_CALLBACK +eq_context_notify (const char *msg, const void *pvt, size_t cb, void *user_data) +{ + void *pvt_copy = malloc (cb); + memcpy (pvt_copy, pvt, cb); + eq_enq (&eq_context_vtbl, user_data, strdup (msg), pvt_copy, (void *)cb); +} + +#define CONTEXT_NOTIFY_CALLBACK \ + void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *) = context_default_notify; \ + void *user_data = 0; \ + \ + if (SvOK (notify)) \ + { \ + pfn_notify = eq_context_notify; \ + user_data = s_get_cv (notify); \ + } + +static SV * +new_clobj_context (cl_context ctx, void *user_data) +{ + SV *sv = NEW_CLOBJ ("OpenCL::Context", ctx); + + if (user_data) + sv_magicext (SvRV (sv), user_data, PERL_MAGIC_ext, 0, 0, 0); + + return sv; +} + +#define XPUSH_CLOBJ_CONTEXT XPUSHs (new_clobj_context (ctx, user_data)); + +/*****************************************************************************/ /* build/compile/link notify */ static void @@ -499,6 +485,7 @@ thread_create (&id, build_program_thread, arg); } +/*****************************************************************************/ /* event objects */ static void @@ -516,7 +503,7 @@ eq_event_notify (cl_event event, cl_int event_command_exec_status, void *user_data) { clRetainEvent (event); - eq_enq (&eq_event_vtbl, user_data, (void *)event, (void *)event_command_exec_status, 0); + eq_enq (&eq_event_vtbl, user_data, (void *)event, (void *)(IV)event_command_exec_status, 0); } /*****************************************************************************/ @@ -637,20 +624,17 @@ PUSH_CLOBJ ("OpenCL::Platform", list [i]); void -context_from_type (cl_context_properties *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, FUTURE notify = 0) +context_from_type (cl_context_properties *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, SV *notify = &PL_sv_undef) PPCODE: + CONTEXT_NOTIFY_CALLBACK; NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (properties, type, 0, 0, &res)); - XPUSH_CLOBJ ("OpenCL::Context", ctx); - -#if 0 + XPUSH_CLOBJ_CONTEXT; void -context (cl_context_properties *properties = 0, FUTURE devices, FUTURE notify = 0) +context (FUTURE properties, FUTURE devices, FUTURE notify) PPCODE: /* der Gipfel der Kunst */ -#endif - void wait_for_events (...) CODE: @@ -710,7 +694,7 @@ PUSHs (sv_setref_pv (sv_newmortal (), "OpenCL::Device", list [i])); void -context (OpenCL::Platform self, cl_context_properties *properties, SV *devices, SV *notify = 0) +context (OpenCL::Platform self, cl_context_properties *properties, SV *devices, SV *notify = &PL_sv_undef) PPCODE: if (!SvROK (devices) || SvTYPE (SvRV (devices)) != SVt_PVAV) croak ("OpenCL::Platform::context argument 'device' must be an arrayref with device objects, in call"); @@ -723,19 +707,19 @@ for (i = num_devices; i--; ) device_list [i] = SvCLOBJ ("clCreateContext", "devices", *av_fetch (av, i, 0), "OpenCL::Device"); - void (CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *) = context_default_notify; - void *user_data = 0; - + CONTEXT_NOTIFY_CALLBACK; NEED_SUCCESS_ARG (cl_context ctx, CreateContext, (properties, num_devices, device_list, pfn_notify, user_data, &res)); - XPUSH_CLOBJ ("OpenCL::Context", ctx); + XPUSH_CLOBJ_CONTEXT; void -context_from_type (OpenCL::Platform self, SV *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, FUTURE notify = 0) +context_from_type (OpenCL::Platform self, SV *properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, SV *notify = &PL_sv_undef) PPCODE: cl_context_properties extra[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)self }; cl_context_properties *props = SvCONTEXTPROPERTIES ("OpenCL::Platform::context_from_type", "properties", properties, extra, 2); + + CONTEXT_NOTIFY_CALLBACK; NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (props, type, 0, 0, &res)); - XPUSH_CLOBJ ("OpenCL::Context", ctx); + XPUSH_CLOBJ_CONTEXT; MODULE = OpenCL PACKAGE = OpenCL::Device @@ -1014,7 +998,7 @@ #if CL_VERSION_1_2 void -image (OpenCL::Context self, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, cl_mem_object_type type, size_t width, size_t height, size_t depth, size_t array_size = 0, size_t row_pitch = 0, size_t slice_pitch = 0, cl_uint num_mip_level = 0, cl_uint num_samples = 0, SV *data = &PL_sv_undef) +image (OpenCL::Context self, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, cl_mem_object_type type, size_t width, size_t height, size_t depth = 0, size_t array_size = 0, size_t row_pitch = 0, size_t slice_pitch = 0, cl_uint num_mip_level = 0, cl_uint num_samples = 0, SV *data = &PL_sv_undef) PPCODE: STRLEN len; char *ptr = SvOK (data) ? SvPVbyte (data, len) : 0; @@ -1217,7 +1201,9 @@ clReleaseCommandQueue (self); void -enqueue_read_buffer (OpenCL::Queue self, OpenCL::Buffer mem, cl_bool blocking, size_t offset, size_t len, SV *data, ...) +read_buffer (OpenCL::Queue self, OpenCL::Buffer mem, cl_bool blocking, size_t offset, size_t len, SV *data, ...) + ALIAS: + enqueue_read_buffer = 0 PPCODE: cl_event ev = 0; EVENT_LIST (6, items - 6); @@ -1232,7 +1218,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_write_buffer (OpenCL::Queue self, OpenCL::Buffer mem, cl_bool blocking, size_t offset, SV *data, ...) +write_buffer (OpenCL::Queue self, OpenCL::Buffer mem, cl_bool blocking, size_t offset, SV *data, ...) + ALIAS: + enqueue_write_buffer = 0 PPCODE: cl_event ev = 0; STRLEN len; @@ -1247,7 +1235,9 @@ #if CL_VERSION_1_2 void -enqueue_fill_buffer (OpenCL::Queue self, OpenCL::Buffer mem, SV *data, size_t offset, size_t size, ...) +fill_buffer (OpenCL::Queue self, OpenCL::Buffer mem, SV *data, size_t offset, size_t size, ...) + ALIAS: + enqueue_fill_buffer = 0 PPCODE: cl_event ev = 0; STRLEN len; @@ -1260,7 +1250,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_fill_image (OpenCL::Queue self, OpenCL::Image img, NV r, NV g, NV b, NV a, size_t x, size_t y, size_t z, size_t width, size_t height, size_t depth, ...) +fill_image (OpenCL::Queue self, OpenCL::Image img, NV r, NV g, NV b, NV a, size_t x, size_t y, size_t z, size_t width, size_t height, size_t depth, ...) + ALIAS: + enqueue_fill_image = 0 PPCODE: cl_event ev = 0; STRLEN len; @@ -1279,7 +1271,7 @@ if (format.image_channel_data_type < CL_SNORM_INT8 || CL_FLOAT < format.image_channel_data_type) croak ("enqueue_fill_image: image has unsupported channel type, only opencl 1.2 channel types supported."); - NEED_SUCCESS (EnqueueFillImage, (self, img, c_fus [fus [format.image_channel_data_type]], + NEED_SUCCESS (EnqueueFillImage, (self, img, c_fus [fus [format.image_channel_data_type - CL_SNORM_INT8]], origin, region, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); if (ev) @@ -1288,7 +1280,9 @@ #endif void -enqueue_copy_buffer (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_offset, size_t dst_offset, size_t len, ...) +copy_buffer (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_offset, size_t dst_offset, size_t len, ...) + ALIAS: + enqueue_copy_buffer = 0 PPCODE: cl_event ev = 0; EVENT_LIST (6, items - 6); @@ -1299,7 +1293,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_read_buffer_rect (OpenCL::Queue self, OpenCL::Memory buf, cl_bool blocking, size_t buf_x, size_t buf_y, size_t buf_z, size_t host_x, size_t host_y, size_t host_z, size_t width, size_t height, size_t depth, size_t buf_row_pitch, size_t buf_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, SV *data, ...) +read_buffer_rect (OpenCL::Queue self, OpenCL::Memory buf, cl_bool blocking, size_t buf_x, size_t buf_y, size_t buf_z, size_t host_x, size_t host_y, size_t host_z, size_t width, size_t height, size_t depth, size_t buf_row_pitch, size_t buf_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, SV *data, ...) + ALIAS: + enqueue_read_buffer_rect = 0 PPCODE: cl_event ev = 0; const size_t buf_origin [3] = { buf_x , buf_y , buf_z }; @@ -1331,7 +1327,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_write_buffer_rect (OpenCL::Queue self, OpenCL::Memory buf, cl_bool blocking, size_t buf_x, size_t buf_y, size_t buf_z, size_t host_x, size_t host_y, size_t host_z, size_t width, size_t height, size_t depth, size_t buf_row_pitch, size_t buf_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, SV *data, ...) +write_buffer_rect (OpenCL::Queue self, OpenCL::Memory buf, cl_bool blocking, size_t buf_x, size_t buf_y, size_t buf_z, size_t host_x, size_t host_y, size_t host_z, size_t width, size_t height, size_t depth, size_t buf_row_pitch, size_t buf_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, SV *data, ...) + ALIAS: + enqueue_write_buffer_rect = 0 PPCODE: cl_event ev = 0; const size_t buf_origin [3] = { buf_x , buf_y , buf_z }; @@ -1364,7 +1362,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_copy_buffer_rect (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, ...) +copy_buffer_rect (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, ...) + ALIAS: + enqueue_copy_buffer_rect = 0 PPCODE: cl_event ev = 0; const size_t src_origin[3] = { src_x, src_y, src_z }; @@ -1378,7 +1378,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_read_image (OpenCL::Queue self, OpenCL::Image src, cl_bool blocking, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...) +read_image (OpenCL::Queue self, OpenCL::Image src, cl_bool blocking, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...) + ALIAS: + enqueue_read_image = 0 PPCODE: cl_event ev = 0; const size_t src_origin[3] = { src_x, src_y, src_z }; @@ -1403,7 +1405,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_write_image (OpenCL::Queue self, OpenCL::Image dst, cl_bool blocking, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...) +write_image (OpenCL::Queue self, OpenCL::Image dst, cl_bool blocking, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...) + ALIAS: + enqueue_write_image = 0 PPCODE: cl_event ev = 0; const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; @@ -1429,7 +1433,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_copy_image (OpenCL::Queue self, OpenCL::Image src, OpenCL::Image dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) +copy_image (OpenCL::Queue self, OpenCL::Image src, OpenCL::Image dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) + ALIAS: + enqueue_copy_image = 0 PPCODE: cl_event ev = 0; const size_t src_origin[3] = { src_x, src_y, src_z }; @@ -1443,7 +1449,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_copy_image_to_buffer (OpenCL::Queue self, OpenCL::Image src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t dst_offset, ...) +copy_image_to_buffer (OpenCL::Queue self, OpenCL::Image src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t dst_offset, ...) + ALIAS: + enqueue_copy_image_to_buffer = 0 PPCODE: cl_event ev = 0; const size_t src_origin[3] = { src_x, src_y, src_z }; @@ -1456,7 +1464,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_copy_buffer_to_image (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Image dst, size_t src_offset, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) +copy_buffer_to_image (OpenCL::Queue self, OpenCL::Buffer src, OpenCL::Image dst, size_t src_offset, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) + ALIAS: + enqueue_copy_buffer_to_image = 0 PPCODE: cl_event ev = 0; const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; @@ -1469,7 +1479,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_task (OpenCL::Queue self, OpenCL::Kernel kernel, ...) +task (OpenCL::Queue self, OpenCL::Kernel kernel, ...) + ALIAS: + enqueue_task = 0 PPCODE: cl_event ev = 0; EVENT_LIST (2, items - 2); @@ -1480,7 +1492,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_nd_range_kernel (OpenCL::Queue self, OpenCL::Kernel kernel, SV *global_work_offset, SV *global_work_size, SV *local_work_size = &PL_sv_undef, ...) +nd_range_kernel (OpenCL::Queue self, OpenCL::Kernel kernel, SV *global_work_offset, SV *global_work_size, SV *local_work_size = &PL_sv_undef, ...) + ALIAS: + enqueue_nd_range_kernel = 0 PPCODE: cl_event ev = 0; size_t *gwo = 0, *gws, *lws = 0; @@ -1534,7 +1548,9 @@ #if cl_apple_gl_sharing || cl_khr_gl_sharing void -enqueue_acquire_gl_objects (OpenCL::Queue self, SV *objects, ...) +acquire_gl_objects (OpenCL::Queue self, SV *objects, ...) + ALIAS: + enqueue_acquire_gl_objects = 0 ALIAS: enqueue_release_gl_objects = 1 PPCODE: @@ -1562,7 +1578,9 @@ #endif void -enqueue_wait_for_events (OpenCL::Queue self, ...) +wait_for_events (OpenCL::Queue self, ...) + ALIAS: + enqueue_wait_for_events = 0 CODE: EVENT_LIST (1, items - 1); #if PREFER_1_1 @@ -1572,7 +1590,9 @@ #endif void -enqueue_marker (OpenCL::Queue self, ...) +marker (OpenCL::Queue self, ...) + ALIAS: + enqueue_marker = 0 PPCODE: cl_event ev = 0; EVENT_LIST (1, items - 1); @@ -1595,7 +1615,9 @@ XPUSH_CLOBJ ("OpenCL::Event", ev); void -enqueue_barrier (OpenCL::Queue self, ...) +barrier (OpenCL::Queue self, ...) + ALIAS: + enqueue_barrier = 0 PPCODE: cl_event ev = 0; EVENT_LIST (1, items - 1); @@ -2134,6 +2156,67 @@ clReleaseKernel (self); void +setf (OpenCL::Kernel self, const char *format, ...) + CODE: + int i; + for (i = 2; ; ++i) + { + while (*format == ' ') + ++format; + + char type = *format++; + + if (!type) + break; + + if (i >= items) + croak ("OpenCL::Kernel::setf format string too long (not enough arguments)"); + + SV *sv = ST (i); + + union + { + cl_char cc; cl_uchar cC; cl_short cs; cl_ushort cS; + cl_int ci; cl_uint cI; cl_long cl; cl_ulong cL; + cl_half ch; cl_float cf; cl_double cd; + cl_mem cm; + cl_sampler ca; + size_t cz; + cl_event ce; + } arg; + size_t size; + + switch (type) + { + case 'c': arg.cc = SvIV (sv); size = sizeof (arg.cc); break; + case 'C': arg.cC = SvUV (sv); size = sizeof (arg.cC); break; + case 's': arg.cs = SvIV (sv); size = sizeof (arg.cs); break; + case 'S': arg.cS = SvUV (sv); size = sizeof (arg.cS); break; + case 'i': arg.ci = SvIV (sv); size = sizeof (arg.ci); break; + case 'I': arg.cI = SvUV (sv); size = sizeof (arg.cI); break; + case 'l': arg.cl = SvIV (sv); size = sizeof (arg.cl); break; + case 'L': arg.cL = SvUV (sv); size = sizeof (arg.cL); break; + + case 'h': arg.ch = SvUV (sv); size = sizeof (arg.ch); break; + case 'f': arg.cf = SvNV (sv); size = sizeof (arg.cf); break; + case 'd': arg.cd = SvNV (sv); size = sizeof (arg.cd); break; + case 'z': arg.cz = SvUV (sv); size = sizeof (arg.cz); break; + + case 'm': arg.cm = SvCLOBJ ("OpenCL::Kernel::setf", "m", sv, "OpenCL::Memory" ); size = sizeof (arg.cm); break; + case 'a': arg.ca = SvCLOBJ ("OpenCL::Kernel::setf", "a", sv, "OpenCL::Sampler"); size = sizeof (arg.ca); break; + case 'e': arg.ca = SvCLOBJ ("OpenCL::Kernel::setf", "e", sv, "OpenCL::Event" ); size = sizeof (arg.ce); break; + + default: + croak ("OpenCL::Kernel::setf format character '%c' not supported", type); + } + + clSetKernelArg (self, i - 2, size, &arg); + } + + if (i != items) + croak ("OpenCL::Kernel::setf format string too short (too many arguments)"); + +void set_char (OpenCL::Kernel self, cl_uint idx, cl_char value) CODE: clSetKernelArg (self, idx, sizeof (value), &value); @@ -2199,12 +2282,10 @@ clSetKernelArg (self, idx, sizeof (value), &value); void -set_image2d (OpenCL::Kernel self, cl_uint idx, OpenCL::Image2D_ornull value) - CODE: - clSetKernelArg (self, idx, sizeof (value), &value); - -void -set_image3d (OpenCL::Kernel self, cl_uint idx, OpenCL::Image3D_ornull value) +set_image (OpenCL::Kernel self, cl_uint idx, OpenCL::Image_ornull value) + ALIAS: + set_image2d = 0 + set_image3d = 0 CODE: clSetKernelArg (self, idx, sizeof (value), &value);