--- OpenCL/OpenCL.xs 2011/11/17 02:10:39 1.7 +++ OpenCL/OpenCL.xs 2011/11/17 04:17:43 1.11 @@ -94,19 +94,24 @@ /*****************************************************************************/ -static cl_int last_error; +static cl_int res; -#define FAIL(name,err) \ - croak ("cl" # name ": %s", err2str (last_error = err)); +#define FAIL(name) \ + croak ("cl" # name ": %s", err2str (res)); #define NEED_SUCCESS(name,args) \ do { \ - cl_int res = cl ## name args; \ + res = cl ## name args; \ \ if (res) \ - FAIL (name, res); \ + FAIL (name); \ } while (0) +#define NEED_SUCCESS_ARG(retdecl, name, args) \ + retdecl = cl ## name args; \ + if (res) \ + FAIL (name); + /*****************************************************************************/ #define NEW_MORTAL_OBJ(class,ptr) sv_setref_pv (sv_newmortal (), class, ptr) @@ -123,6 +128,14 @@ /*****************************************************************************/ +static size_t +img_row_pitch (cl_mem img) +{ + size_t res; + clGetImageInfo (img, CL_IMAGE_ROW_PITCH, sizeof (res), &res, 0); + return res; +} + static cl_event * event_list (SV **items, int count) { @@ -141,10 +154,8 @@ #define INFO(class) \ { \ size_t size; \ - SV *sv; \ - \ NEED_SUCCESS (Get ## class ## Info, (this, name, 0, 0, &size)); \ - sv = sv_2mortal (newSV (size)); \ + SV *sv = sv_2mortal (newSV (size)); \ SvUPGRADE (sv, SVt_PV); \ SvPOK_only (sv); \ SvCUR_set (sv, size); \ @@ -180,7 +191,7 @@ cl_int errno () CODE: - errno = last_error; + errno = res; const char * err2str (cl_int err) @@ -191,7 +202,6 @@ void platforms () PPCODE: -{ cl_platform_id *list; cl_uint count; int i; @@ -203,28 +213,23 @@ EXTEND (SP, count); for (i = 0; i < count; ++i) PUSHs (NEW_MORTAL_OBJ ("OpenCL::Platform", list [i])); -} void context_from_type (FUTURE properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, FUTURE notify = 0) PPCODE: -{ - cl_int res; - cl_context ctx = clCreateContextFromType (0, type, 0, 0, &res); - - if (res) - FAIL (CreateContextFromType, res); - + NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (0, type, 0, 0, &res)); XPUSH_NEW_OBJ ("OpenCL::Context", ctx); -} + +void +context (FUTURE properties, FUTURE devices, FUTURE notify = 0) + PPCODE: + /* der Gipfel der Kunst */ void wait_for_events (...) CODE: -{ EVENT_LIST (0, items); NEED_SUCCESS (WaitForEvents, (event_list_count, event_list_ptr)); -} PROTOTYPES: DISABLE @@ -238,7 +243,6 @@ void devices (OpenCL::Platform this, cl_device_type type = CL_DEVICE_TYPE_ALL) PPCODE: -{ cl_device_id *list; cl_uint count; int i; @@ -250,21 +254,30 @@ EXTEND (SP, count); for (i = 0; i < count; ++i) PUSHs (sv_setref_pv (sv_newmortal (), "OpenCL::Device", list [i])); -} void -context_from_type (OpenCL::Platform this, FUTURE properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, FUTURE notify = 0) +context (OpenCL::Platform this, FUTURE properties, SV *devices, FUTURE notify = 0) PPCODE: -{ - cl_int res; - cl_context_properties props[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)this, 0 }; - cl_context ctx = clCreateContextFromType (props, type, 0, 0, &res); + if (!SvROK (devices) || SvTYPE (SvRV (devices)) != SVt_PVAV) + croak ("OpenCL::Platform argument 'device' must be an arrayref with device objects, in call"); + + AV *av = (AV *)SvRV (devices); + cl_uint num_devices = av_len (av) + 1; + cl_device_id *device_list = tmpbuf (sizeof (cl_device_id) * num_devices); + int i; - if (res) - FAIL (CreateContextFromType, res); + for (i = num_devices; i--; ) + device_list [i] = SvPTROBJ ("clCreateContext", "devices", *av_fetch (av, i, 0), "OpenCL::Device"); + NEED_SUCCESS_ARG (cl_context ctx, CreateContext, (0, num_devices, device_list, 0, 0, &res)); + XPUSH_NEW_OBJ ("OpenCL::Context", ctx); + +void +context_from_type (OpenCL::Platform this, FUTURE properties = 0, cl_device_type type = CL_DEVICE_TYPE_DEFAULT, FUTURE notify = 0) + PPCODE: + cl_context_properties props[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)this, 0 }; + NEED_SUCCESS_ARG (cl_context ctx, CreateContextFromType, (props, type, 0, 0, &res)); XPUSH_NEW_OBJ ("OpenCL::Context", ctx); -} MODULE = OpenCL PACKAGE = OpenCL::Device @@ -273,19 +286,6 @@ PPCODE: INFO (Device) -void -context (OpenCL::Device this, FUTURE properties = 0, FUTURE notify = 0) - PPCODE: -{ - cl_int res; - cl_context ctx = clCreateContext (0, 1, &this, 0, 0, &res); - - if (res) - FAIL (CreateContext, res); - - XPUSH_NEW_OBJ ("OpenCL::Context", ctx); -} - MODULE = OpenCL PACKAGE = OpenCL::Context void @@ -301,99 +301,53 @@ void queue (OpenCL::Context this, OpenCL::Device device, cl_command_queue_properties properties = 0) PPCODE: -{ - cl_int res; - cl_command_queue queue = clCreateCommandQueue (this, device, properties, &res); - - if (res) - FAIL (CreateCommandQueue, res); - + NEED_SUCCESS_ARG (cl_command_queue queue, CreateCommandQueue, (this, device, properties, &res)); XPUSH_NEW_OBJ ("OpenCL::Queue", queue); -} void user_event (OpenCL::Context this) PPCODE: -{ - cl_int res; - cl_event ev = clCreateUserEvent (this, &res); - - if (res) - FAIL (CreateUserevent, res); - + NEED_SUCCESS_ARG (cl_event ev, CreateUserEvent, (this, &res)); XPUSH_NEW_OBJ ("OpenCL::UserEvent", ev); -} void buffer (OpenCL::Context this, cl_mem_flags flags, size_t len) PPCODE: -{ - cl_int res; - cl_mem mem; - if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) croak ("clCreateBuffer: cannot use/copy host ptr when no data is given, use $context->buffer_sv instead?"); - mem = clCreateBuffer (this, flags, len, 0, &res); - - if (res) - FAIL (CreateBuffer, res); - + NEED_SUCCESS_ARG (cl_mem mem, CreateBuffer, (this, flags, len, 0, &res)); XPUSH_NEW_OBJ ("OpenCL::Buffer", mem); -} void buffer_sv (OpenCL::Context this, cl_mem_flags flags, SV *data) PPCODE: -{ STRLEN len; char *ptr = SvPVbyte (data, len); - cl_int res; - cl_mem mem; if (!(flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR))) croak ("clCreateBuffer: have to specify use or copy host ptr when buffer data is given, use $context->buffer instead?"); - mem = clCreateBuffer (this, flags, len, ptr, &res); - - if (res) - FAIL (CreateBuffer, res); - + NEED_SUCCESS_ARG (cl_mem mem, CreateBuffer, (this, flags, len, ptr, &res)); XPUSH_NEW_OBJ ("OpenCL::Buffer", mem); -} void -image2d (OpenCL::Context this, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, size_t width, size_t height, SV *data) +image2d (OpenCL::Context this, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, size_t width, size_t height, size_t row_pitch, SV *data) PPCODE: -{ STRLEN len; char *ptr = SvPVbyte (data, len); const cl_image_format format = { channel_order, channel_type }; - cl_int res; - cl_mem mem = clCreateImage2D (this, flags, &format, width, height, len / height, ptr, &res); - - if (res) - FAIL (CreateImage2D, res); - + NEED_SUCCESS_ARG (cl_mem mem, CreateImage2D, (this, flags, &format, width, height, row_pitch, ptr, &res)); XPUSH_NEW_OBJ ("OpenCL::Image2D", mem); -} void -image3d (OpenCL::Context this, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, size_t width, size_t height, size_t depth, size_t slice_pitch, SV *data) +image3d (OpenCL::Context this, cl_mem_flags flags, cl_channel_order channel_order, cl_channel_type channel_type, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data) PPCODE: -{ STRLEN len; char *ptr = SvPVbyte (data, len); const cl_image_format format = { channel_order, channel_type }; - cl_int res; - cl_mem mem = clCreateImage3D (this, flags, &format, width, height, - depth, len / (height * slice_pitch), slice_pitch, ptr, &res); - - if (res) - FAIL (CreateImage3D, res); - + NEED_SUCCESS_ARG (cl_mem mem, CreateImage3D, (this, flags, &format, width, height, depth, row_pitch, slice_pitch, ptr, &res)); XPUSH_NEW_OBJ ("OpenCL::Image3D", mem); -} void supported_image_formats (OpenCL::Context this, cl_mem_flags flags, cl_mem_object_type image_type) @@ -420,34 +374,19 @@ void sampler (OpenCL::Context this, cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode) PPCODE: -{ - cl_int res; - cl_sampler sampler = clCreateSampler (this, normalized_coords, addressing_mode, filter_mode, &res); - - if (res) - FAIL (CreateSampler, res); - + NEED_SUCCESS_ARG (cl_sampler sampler, CreateSampler, (this, normalized_coords, addressing_mode, filter_mode, &res)); XPUSH_NEW_OBJ ("OpenCL::Sampler", sampler); -} void program_with_source (OpenCL::Context this, SV *program) PPCODE: -{ STRLEN len; size_t len2; const char *ptr = SvPVbyte (program, len); - cl_int res; - cl_program prog; len2 = len; - prog = clCreateProgramWithSource (this, 1, &ptr, &len2, &res); - - if (res) - FAIL (CreateProgramWithSource, res); - + NEED_SUCCESS_ARG (cl_program prog, CreateProgramWithSource, (this, 1, &ptr, &len2, &res)); XPUSH_NEW_OBJ ("OpenCL::Program", prog); -} MODULE = OpenCL PACKAGE = OpenCL::Queue @@ -464,7 +403,6 @@ void enqueue_read_buffer (OpenCL::Queue this, OpenCL::Buffer mem, cl_bool blocking, size_t offset, size_t len, SV *data, ...) PPCODE: -{ cl_event ev = 0; EVENT_LIST (6, items - 6); @@ -476,12 +414,10 @@ if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); -} void enqueue_write_buffer (OpenCL::Queue this, OpenCL::Buffer mem, cl_bool blocking, size_t offset, SV *data, ...) PPCODE: -{ cl_event ev = 0; STRLEN len; char *ptr = SvPVbyte (data, len); @@ -491,12 +427,10 @@ if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); -} void enqueue_copy_buffer (OpenCL::Queue this, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_offset, size_t dst_offset, size_t len, ...) PPCODE: -{ cl_event ev = 0; EVENT_LIST (6, items - 6); @@ -504,20 +438,22 @@ if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); -} - - /*TODO http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueReadBufferRect.html */ - /*TODO http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clEnqueueWriteBufferRect.html */ void enqueue_read_image (OpenCL::Queue this, OpenCL::Image src, cl_bool blocking, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...) PPCODE: -{ cl_event ev = 0; const size_t src_origin[3] = { src_x, src_y, src_z }; const size_t region[3] = { width, height, depth }; - size_t len = row_pitch * slice_pitch * depth; - EVENT_LIST (11, items - 11); + EVENT_LIST (12, items - 12); + + if (!row_pitch) + row_pitch = img_row_pitch (src); + + if (depth > 1 && !slice_pitch) + slice_pitch = row_pitch * height; + + size_t len = slice_pitch ? slice_pitch * depth : row_pitch * height; SvUPGRADE (data, SVt_PV); SvGROW (data, len); @@ -527,30 +463,36 @@ if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); -} void -enqueue_write_image (OpenCL::Queue this, OpenCL::Image dst, cl_bool blocking, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t row_pitch, SV *data, ...) +enqueue_write_image (OpenCL::Queue this, OpenCL::Image dst, cl_bool blocking, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t row_pitch, size_t slice_pitch, SV *data, ...) PPCODE: -{ cl_event ev = 0; const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; const size_t region[3] = { width, height, depth }; STRLEN len; char *ptr = SvPVbyte (data, len); - size_t slice_pitch = len / (row_pitch * height); - EVENT_LIST (11, items - 11); + EVENT_LIST (12, items - 12); + + if (!row_pitch) + row_pitch = img_row_pitch (dst); + + if (depth > 1 && !slice_pitch) + slice_pitch = row_pitch * height; + + size_t min_len = slice_pitch ? slice_pitch * depth : row_pitch * height; + + if (len < min_len) + croak ("clEnqueueWriteImage: data string is shorter than what would be transferred"); NEED_SUCCESS (EnqueueWriteImage, (this, dst, blocking, dst_origin, region, row_pitch, slice_pitch, SvPVX (data), event_list_count, event_list_ptr, GIMME_V != G_VOID ? &ev : 0)); if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); -} void enqueue_copy_buffer_rect (OpenCL::Queue this, OpenCL::Buffer src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch, ...) PPCODE: -{ cl_event ev = 0; const size_t src_origin[3] = { src_x, src_y, src_z }; const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; @@ -561,12 +503,10 @@ if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); -} void enqueue_copy_buffer_to_image (OpenCL::Queue this, OpenCL::Buffer src, OpenCL::Image dst, size_t src_offset, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) PPCODE: -{ cl_event ev = 0; const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; const size_t region[3] = { width, height, depth }; @@ -576,12 +516,10 @@ if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); -} void -enqueue_copy_image (OpenCL::Queue this, OpenCL::Image src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) +enqueue_copy_image (OpenCL::Queue this, OpenCL::Image src, OpenCL::Image dst, size_t src_x, size_t src_y, size_t src_z, size_t dst_x, size_t dst_y, size_t dst_z, size_t width, size_t height, size_t depth, ...) PPCODE: -{ cl_event ev = 0; const size_t src_origin[3] = { src_x, src_y, src_z }; const size_t dst_origin[3] = { dst_x, dst_y, dst_z }; @@ -592,12 +530,10 @@ if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); -} void enqueue_copy_image_to_buffer (OpenCL::Queue this, OpenCL::Image src, OpenCL::Buffer dst, size_t src_x, size_t src_y, size_t src_z, size_t width, size_t height, size_t depth, size_t dst_offset, ...) PPCODE: -{ cl_event ev = 0; const size_t src_origin[3] = { src_x, src_y, src_z }; const size_t region[3] = { width, height, depth }; @@ -607,12 +543,10 @@ if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); -} void enqueue_task (OpenCL::Queue this, OpenCL::Kernel kernel, ...) PPCODE: -{ cl_event ev = 0; EVENT_LIST (2, items - 2); @@ -620,12 +554,10 @@ if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); -} void enqueue_nd_range_kernel (OpenCL::Queue this, OpenCL::Kernel kernel, SV *global_work_offset, SV *global_work_size, SV *local_work_size = &PL_sv_undef, ...) PPCODE: -{ cl_event ev = 0; size_t *gwo = 0, *gws, *lws = 0; int gws_len; @@ -674,24 +606,19 @@ if (ev) XPUSH_NEW_OBJ ("OpenCL::Event", ev); -} void enqueue_marker (OpenCL::Queue this) PPCODE: -{ cl_event ev; NEED_SUCCESS (EnqueueMarker, (this, &ev)); XPUSH_NEW_OBJ ("OpenCL::Event", ev); -} void enqueue_wait_for_events (OpenCL::Queue this, ...) CODE: -{ EVENT_LIST (1, items - 1); NEED_SUCCESS (EnqueueWaitForEvents, (this, event_list_count, event_list_ptr)); -} void enqueue_barrier (OpenCL::Queue this) @@ -752,31 +679,20 @@ void build_info (OpenCL::Program this, OpenCL::Device device, cl_program_build_info name) PPCODE: -{ size_t size; - SV *sv; - NEED_SUCCESS (GetProgramBuildInfo, (this, device, name, 0, 0, &size)); - sv = sv_2mortal (newSV (size)); + SV *sv = sv_2mortal (newSV (size)); SvUPGRADE (sv, SVt_PV); SvPOK_only (sv); SvCUR_set (sv, size); NEED_SUCCESS (GetProgramBuildInfo, (this, device, name, size, SvPVX (sv), 0)); XPUSHs (sv); -} void kernel (OpenCL::Program program, SV *function) PPCODE: -{ - cl_int res; - cl_kernel kernel = clCreateKernel (program, SvPVbyte_nolen (function), &res); - - if (res) - FAIL (CreateKernel, res); - + NEED_SUCCESS_ARG (cl_kernel kernel, CreateKernel, (program, SvPVbyte_nolen (function), &res)); XPUSH_NEW_OBJ ("OpenCL::Kernel", kernel); -} MODULE = OpenCL PACKAGE = OpenCL::Kernel