--- OpenCL/OpenCL.xs 2011/11/15 09:24:40 1.2 +++ OpenCL/OpenCL.xs 2011/11/15 20:38:07 1.3 @@ -9,36 +9,64 @@ typedef cl_context OpenCL__Context; typedef cl_command_queue OpenCL__Queue; typedef cl_mem OpenCL__Memory; +typedef cl_mem OpenCL__Buffer; +typedef cl_mem OpenCL__Image; +typedef cl_mem OpenCL__Image2D; +typedef cl_mem OpenCL__Image3D; +typedef cl_mem OpenCL__Memory_ornull; +typedef cl_mem OpenCL__Buffer_ornull; +typedef cl_mem OpenCL__Image_ornull; +typedef cl_mem OpenCL__Image2D_ornull; +typedef cl_mem OpenCL__Image3D_ornull; typedef cl_sampler OpenCL__Sampler; typedef cl_program OpenCL__Program; typedef cl_kernel OpenCL__Kernel; typedef cl_event OpenCL__Event; -static const struct { +typedef struct +{ IV iv; const char *name; -} cl_error[] = { -#define def_error(name) { (IV)CL_ ## name, # name }, -#include "invalid.h" -}; + #define const_iv(name) { (IV)CL_ ## name, # name }, +} ivstr; static const char * -clstrerror (cl_int res) +iv2str (IV value, const ivstr *base, int count, const char *fallback) { int i; - static char numbuf [32]; + static char strbuf [32]; + + for (i = count; i--; ) + if (base [i].iv == value) + return base [i].name; + + snprintf (strbuf, sizeof (strbuf), fallback, (int)value); + + return strbuf; +} + +static const char * +enum2str (cl_uint value) +{ + static const ivstr enumstr[] = { + #include "enumstr.h" + }; - for (i = sizeof (cl_error) / sizeof (cl_error [0]); i--; ) - if (cl_error [i].iv == res) - return cl_error [i].name; + return iv2str (value, enumstr, sizeof (enumstr) / sizeof (enumstr [0]), "ENUM(0x%04x)"); +} - snprintf (numbuf, sizeof (numbuf), "ERROR(%d)", res); +static const char * +err2str (cl_int err) +{ + static const ivstr errstr[] = { + #include "errstr.h" + }; - return numbuf; + return iv2str (err, errstr, sizeof (errstr) / sizeof (errstr [0]), "ERROR(%d)"); } #define FAIL(name,res) \ - croak ("cl" # name ": %s", clstrerror (res)); + croak ("cl" # name ": %s", err2str (res)); #define NEED_SUCCESS(name,args) \ do { \ @@ -68,6 +96,15 @@ XPUSHs (sv); \ } +static void * +SvPTROBJ (const char *func, const char *svname, SV *sv, const char *pkg) +{ + if (SvROK (sv) && sv_derived_from (sv, pkg)) + return (void *)SvIV (SvRV (sv)); + + croak ("%s: %s is not of type %s", func, svname, pkg); +} + MODULE = OpenCL PACKAGE = OpenCL PROTOTYPES: ENABLE @@ -75,17 +112,29 @@ BOOT: { HV *stash = gv_stashpv ("OpenCL", 1); - static const struct { - const char *name; - IV iv; - } *civ, const_iv[] = { -#define const_iv(name) { # name, (IV)CL_ ## name }, + static const ivstr *civ, const_iv[] = { + { sizeof (cl_char ), "SIZEOF_CHAR" }, + { sizeof (cl_uchar ), "SIZEOF_UCHAR" }, + { sizeof (cl_short ), "SIZEOF_SHORT" }, + { sizeof (cl_ushort), "SIZEOF_USHORT"}, + { sizeof (cl_int ), "SIZEOF_INT" }, + { sizeof (cl_uint ), "SIZEOF_UINT" }, + { sizeof (cl_long ), "SIZEOF_LONG" }, + { sizeof (cl_ulong ), "SIZEOF_ULONG" }, + { sizeof (cl_half ), "SIZEOF_HALF" }, + { sizeof (cl_float ), "SIZEOF_FLOAT" }, #include "constiv.h" }; for (civ = const_iv + sizeof (const_iv) / sizeof (const_iv [0]); civ > const_iv; civ--) newCONSTSUB (stash, (char *)civ[-1].name, newSViv (civ[-1].iv)); } +const char * +err2str (cl_int err) + +const char * +enum2str (cl_uint value) + void platforms () PPCODE: @@ -168,11 +217,6 @@ XPUSH_NEW_OBJ ("OpenCL::Context", ctx); } -void -unload_compiler () - CODE: - NEED_SUCCESS (UnloadCompiler, ()); - MODULE = OpenCL PACKAGE = OpenCL::Device void @@ -223,12 +267,17 @@ PPCODE: { cl_int res; - cl_mem mem = clCreateBuffer (this, flags, len, 0, &res); + cl_mem mem; + + if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) + croak ("clCreateBuffer: cannot use/copy host ptr when no data is given, use $context->buffer_sv instead?"); + + mem = clCreateBuffer (this, flags, len, 0, &res); if (res) FAIL (CreateBuffer, res); - XPUSH_NEW_OBJ ("OpenCL::Memory", mem); + XPUSH_NEW_OBJ ("OpenCL::Buffer", mem); } void @@ -238,12 +287,72 @@ STRLEN len; char *ptr = SvPVbyte (data, len); cl_int res; - cl_mem mem = clCreateBuffer (this, flags, len, ptr, &res); + cl_mem mem; + + if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) + croak ("clCreateBuffer: have to specify use or copy host ptr when buffer data is given, use $context->buffer instead?"); + + mem = clCreateBuffer (this, flags, len, ptr, &res); if (res) FAIL (CreateBuffer, res); - XPUSH_NEW_OBJ ("OpenCL::Memory", mem); + XPUSH_NEW_OBJ ("OpenCL::Buffer", mem); +} + +void +image2d (OpenCL::Context this, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, size_t width, size_t height, SV *data) + PPCODE: +{ + STRLEN len; + char *ptr = SvPVbyte (data, len); + const cl_image_format format = { channel_order, channel_type }; + cl_int res; + cl_mem mem = clCreateImage2D (this, flags, &format, width, height, len / height, ptr, &res); + + if (res) + FAIL (CreateImage2D, res); + + XPUSH_NEW_OBJ ("OpenCL::Image2D", mem); +} + +void +image3d (OpenCL::Context this, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, size_t width, size_t height, size_t depth, size_t slice_pitch, SV *data) + PPCODE: +{ + STRLEN len; + char *ptr = SvPVbyte (data, len); + const cl_image_format format = { channel_order, channel_type }; + cl_int res; + cl_mem mem = clCreateImage3D (this, flags, &format, width, height, + depth, len / (height * slice_pitch), slice_pitch, ptr, &res); + + if (res) + FAIL (CreateImage3D, res); + + XPUSH_NEW_OBJ ("OpenCL::Image3D", mem); +} + +void +supported_image_formats (OpenCL::Context this, cl_mem_flags flags, cl_mem_object_type image_type) + PPCODE: +{ + cl_uint count; + cl_image_format *list; + int i; + + NEED_SUCCESS (GetSupportedImageFormats, (this, flags, image_type, 0, 0, &count)); + Newx (list, count, cl_image_format); + NEED_SUCCESS (GetSupportedImageFormats, (this, flags, image_type, count, list, 0)); + + EXTEND (SP, count); + for (i = 0; i < count; ++i) + { + AV *av = newAV (); + av_store (av, 1, newSVuv (list [i].image_channel_data_type)); + av_store (av, 0, newSVuv (list [i].image_channel_order)); + PUSHs (sv_2mortal (newRV_noinc ((SV *)av))); + } } void @@ -291,7 +400,7 @@ INFO (CommandQueue) void -enqueue_read_buffer (OpenCL::Queue this, OpenCL::Memory mem, cl_bool blocking, size_t offset, size_t len, SV *data, ...) +enqueue_read_buffer (OpenCL::Queue this, OpenCL::Buffer mem, cl_bool blocking, size_t offset, size_t len, SV *data, ...) PPCODE: { cl_event ev = 0; @@ -308,7 +417,7 @@ } void -enqueue_write_buffer (OpenCL::Queue this, OpenCL::Memory mem, cl_bool blocking, size_t offset, SV *data, ...) +enqueue_write_buffer (OpenCL::Queue this, OpenCL::Buffer mem, cl_bool blocking, size_t offset, SV *data, ...) PPCODE: { cl_event ev = 0; @@ -323,7 +432,7 @@ } void -enqueue_copy_buffer (OpenCL::Queue this, OpenCL::Memory src, OpenCL::Memory dst, size_t src_offset, size_t dst_offset, size_t len, ...) +enqueue_copy_buffer (OpenCL::Queue this, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_offset, size_t dst_offset, size_t len, ...) PPCODE: { cl_event ev = 0; @@ -335,6 +444,124 @@ XPUSH_NEW_OBJ ("OpenCL::Event", ev); } + /*TODO http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueReadBufferRect.html */ + /*TODO http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueWriteBufferRect.html */ + +void +enqueue_read_image (OpenCL::Queue this, OpenCL::Image src, cl_bool blocking, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...) + PPCODE: +{ + cl_event ev = 0; + const size_t src_origin[3] = { src_x, src_y, src_z }; + const size_t region[3] = { width, height, depth }; + size_t len = row_pitch * slice_pitch * depth; + EVENT_LIST (11, items - 11); + + SvUPGRADE (data, SVt_PV); + SvGROW (data, len); + SvPOK_only (data); + SvCUR_set (data, len); + NEED_SUCCESS (EnqueueReadImage, (this, src, blocking, src_origin, region, row_pitch, slice_pitch, SvPVX (data), event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + + if (ev) + XPUSH_NEW_OBJ ("OpenCL::Event", ev); +} + +void +enqueue_write_image (OpenCL::Queue this, OpenCL::Image dst, cl_bool blocking, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t row_pitch, SV *data, ...) + PPCODE: +{ + cl_event ev = 0; + const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; + const size_t region[3] = { width, height, depth }; + STRLEN len; + char *ptr = SvPVbyte (data, len); + size_t slice_pitch = len / (row_pitch * height); + EVENT_LIST (11, items - 11); + + NEED_SUCCESS (EnqueueWriteImage, (this, dst, blocking, dst_origin, region, row_pitch, slice_pitch, SvPVX (data), event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + + if (ev) + XPUSH_NEW_OBJ ("OpenCL::Event", ev); +} + +void +enqueue_copy_buffer_rect (OpenCL::Queue this, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, ...) + PPCODE: +{ + cl_event ev = 0; + const size_t src_origin[3] = { src_x, src_y, src_z }; + const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; + const size_t region[3] = { width, height, depth }; + EVENT_LIST (16, items - 16); + + NEED_SUCCESS (EnqueueCopyBufferRect, (this, src, dst, src_origin, dst_origin, region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + + if (ev) + XPUSH_NEW_OBJ ("OpenCL::Event", ev); +} + +void +enqueue_copy_buffer_to_image (OpenCL::Queue this, OpenCL::Buffer src, OpenCL::Image dst, size_t src_offset, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) + PPCODE: +{ + cl_event ev = 0; + const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; + const size_t region[3] = { width, height, depth }; + EVENT_LIST (10, items - 10); + + NEED_SUCCESS (EnqueueCopyBufferToImage, (this, src, dst, src_offset, dst_origin, region, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + + if (ev) + XPUSH_NEW_OBJ ("OpenCL::Event", ev); +} + +void +enqueue_copy_image (OpenCL::Queue this, OpenCL::Image src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) + PPCODE: +{ + cl_event ev = 0; + const size_t src_origin[3] = { src_x, src_y, src_z }; + const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; + const size_t region[3] = { width, height, depth }; + EVENT_LIST (12, items - 12); + + NEED_SUCCESS (EnqueueCopyImage, (this, src, dst, src_origin, dst_origin, region, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + + if (ev) + XPUSH_NEW_OBJ ("OpenCL::Event", ev); +} + +void +enqueue_copy_image_to_buffer (OpenCL::Queue this, OpenCL::Image src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t dst_offset, ...) + PPCODE: +{ + cl_event ev = 0; + const size_t src_origin[3] = { src_x, src_y, src_z }; + const size_t region[3] = { width, height, depth }; + EVENT_LIST (10, items - 10); + + NEED_SUCCESS (EnqueueCopyImageToBuffer, (this, src, dst, src_origin, region, dst_offset, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + + if (ev) + XPUSH_NEW_OBJ ("OpenCL::Event", ev); +} + +void +enqueue_task (OpenCL::Queue this, OpenCL::Kernel kernel, ...) + PPCODE: +{ + cl_event ev = 0; + EVENT_LIST (2, items - 2); + + NEED_SUCCESS (EnqueueTask, (this, kernel, event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); + + if (ev) + XPUSH_NEW_OBJ ("OpenCL::Event", ev); +} + + /*TODO http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueNDRangeKernel.html */ + void enqueue_marker (OpenCL::Queue this) PPCODE: @@ -357,6 +584,16 @@ CODE: NEED_SUCCESS (EnqueueBarrier, (this)); +void +flush (OpenCL::Queue this) + CODE: + NEED_SUCCESS (Flush, (this)); + +void +finish (OpenCL::Queue this) + CODE: + NEED_SUCCESS (Finish, (this)); + MODULE = OpenCL PACKAGE = OpenCL::Memory void @@ -440,9 +677,84 @@ INFO (Kernel) void -set_bool (OpenCL::Kernel this, cl_uint idx, cl_bool value) +set_char (OpenCL::Kernel this, cl_uint idx, cl_char value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_uchar (OpenCL::Kernel this, cl_uint idx, cl_uchar value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_short (OpenCL::Kernel this, cl_uint idx, cl_short value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_ushort (OpenCL::Kernel this, cl_uint idx, cl_ushort value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_int (OpenCL::Kernel this, cl_uint idx, cl_int value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_uint (OpenCL::Kernel this, cl_uint idx, cl_uint value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_long (OpenCL::Kernel this, cl_uint idx, cl_long value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_ulong (OpenCL::Kernel this, cl_uint idx, cl_ulong value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_half (OpenCL::Kernel this, cl_uint idx, cl_half value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_float (OpenCL::Kernel this, cl_uint idx, cl_float value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_memory (OpenCL::Kernel this, cl_uint idx, OpenCL::Memory_ornull value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_buffer (OpenCL::Kernel this, cl_uint idx, OpenCL::Buffer_ornull value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_image2d (OpenCL::Kernel this, cl_uint idx, OpenCL::Image2D_ornull value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_image3d (OpenCL::Kernel this, cl_uint idx, OpenCL::Image3D_ornull value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_sampler (OpenCL::Kernel this, cl_uint idx, OpenCL::Sampler value) + CODE: + clSetKernelArg (this, idx, sizeof (value), &value); + +void +set_event (OpenCL::Kernel this, cl_uint idx, OpenCL::Event value) CODE: - clKernelSetArg (this, idx, sizeof (value), &value); + clSetKernelArg (this, idx, sizeof (value), &value); MODULE = OpenCL PACKAGE = OpenCL::Event