#include "config.h" #include #include #include #include #include #include "gegl.h" #include "gegl/gegl-debug.h" #include "gegl-buffer-types.h" #include "gegl-buffer-cl-iterator.h" #include "gegl-buffer-cl-cache.h" #include "gegl-buffer-private.h" #include "gegl-tile-storage.h" #include "gegl-utils.h" #define CL_ERROR {GEGL_NOTE (GEGL_DEBUG_OPENCL, "Error in %s:%d@%s - %s\n", __FILE__, __LINE__, __func__, gegl_cl_errstring(cl_err)); goto error;} typedef struct GeglBufferClIterators { /* current region of interest */ gint n; size_t size [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX]; /* length of current data in pixels */ cl_mem tex [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX]; GeglRectangle roi [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX]; /* the following is private: */ cl_mem tex_buf [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX]; cl_mem tex_op [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX]; /* don't free textures loaded from cache */ gboolean tex_buf_from_cache [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX]; gint iterators; gint iteration_no; gboolean is_finished; guint flags [GEGL_CL_BUFFER_MAX_ITERATORS]; gint area [GEGL_CL_BUFFER_MAX_ITERATORS][4]; GeglRectangle rect [GEGL_CL_BUFFER_MAX_ITERATORS]; /* the region we iterate on. They can be different from each other, but width and height are the same */ const Babl *format [GEGL_CL_BUFFER_MAX_ITERATORS]; /* The format required for the data */ GeglBuffer *buffer [GEGL_CL_BUFFER_MAX_ITERATORS]; /* buffer->soft_format */ size_t buf_cl_format_size [GEGL_CL_BUFFER_MAX_ITERATORS]; /* format */ size_t op_cl_format_size [GEGL_CL_BUFFER_MAX_ITERATORS]; gegl_cl_color_op conv [GEGL_CL_BUFFER_MAX_ITERATORS]; /* total iteration */ gint rois; gint roi_no; GeglRectangle *roi_all; } GeglBufferClIterators; gint gegl_buffer_cl_iterator_add_2 (GeglBufferClIterator *iterator, GeglBuffer *buffer, const GeglRectangle *result, const Babl *format, guint flags, gint left, gint right, gint top, gint bottom, GeglAbyssPolicy abyss_policy) { GeglBufferClIterators *i = (gpointer)iterator; gint self = 0; if (i->iterators+1 > GEGL_CL_BUFFER_MAX_ITERATORS) { g_error ("too many iterators (%i)", i->iterators+1); } if (i->iterators == 0) /* for sanity, we zero at init */ { memset (i, 0, sizeof (GeglBufferClIterators)); } self = i->iterators++; if (!result) result = self==0?&(buffer->extent):&(i->rect[0]); i->rect[self]=*result; i->flags[self]=flags; if (flags == GEGL_CL_BUFFER_WRITE || flags == GEGL_CL_BUFFER_READ) { g_assert (buffer); i->buffer[self]= g_object_ref (buffer); if (format) i->format[self]=format; else i->format[self]=buffer->soft_format; if (flags == GEGL_CL_BUFFER_WRITE) i->conv[self] = gegl_cl_color_supported (format, buffer->soft_format); else i->conv[self] = gegl_cl_color_supported (buffer->soft_format, format); gegl_cl_color_babl (buffer->soft_format, &i->buf_cl_format_size[self]); gegl_cl_color_babl (format, &i->op_cl_format_size [self]); } else /* GEGL_CL_BUFFER_AUX */ { g_assert (buffer == NULL); i->buffer[self] = NULL; i->format[self] = NULL; i->conv[self] = -1; i->buf_cl_format_size[self] = SIZE_MAX; gegl_cl_color_babl (format, &i->op_cl_format_size [self]); } i->area[self][0] = left; i->area[self][1] = right; i->area[self][2] = top; i->area[self][3] = bottom; if (flags == GEGL_CL_BUFFER_WRITE && (left > 0 || right > 0 || top > 0 || bottom > 0)) g_assert(FALSE); if (self!=0) { /* we make all subsequently added iterators share the width and height of the first one */ i->rect[self].width = i->rect[0].width; i->rect[self].height = i->rect[0].height; } else { gint x, y, j; i->rois = 0; for (y=result->y; y < result->y + result->height; y += gegl_cl_get_iter_height ()) for (x=result->x; x < result->x + result->width; x += gegl_cl_get_iter_width ()) i->rois++; i->roi_no = 0; i->roi_all = g_new0 (GeglRectangle, i->rois); j = 0; for (y=0; y < result->height; y += gegl_cl_get_iter_height ()) for (x=0; x < result->width; x += gegl_cl_get_iter_width ()) { GeglRectangle r = {x, y, MIN(gegl_cl_get_iter_width (), result->width - x), MIN(gegl_cl_get_iter_height (), result->height - y)}; i->roi_all[j] = r; j++; } } return self; } gint gegl_buffer_cl_iterator_add (GeglBufferClIterator *iterator, GeglBuffer *buffer, const GeglRectangle *result, const Babl *format, guint flags, GeglAbyssPolicy abyss_policy) { return gegl_buffer_cl_iterator_add_2 (iterator, buffer, result, format, flags, 0,0,0,0, abyss_policy); } #define OPENCL_USE_CACHE 1 gboolean gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err) { GeglBufferClIterators *i = (gpointer)iterator; gboolean result = FALSE; gint no, j; cl_int cl_err = 0; if (i->is_finished) g_error ("%s called on finished buffer iterator", G_STRFUNC); if (i->iteration_no == 0) { for (no=0; noiterators;no++) { if (i->buffer[no]) { gint j; gboolean found = FALSE; for (j=0; jbuffer[no]==i->buffer[j]) { found = TRUE; break; } if (!found) gegl_buffer_lock (i->buffer[no]); if (i->flags[no] == GEGL_CL_BUFFER_WRITE || (i->flags[no] == GEGL_CL_BUFFER_READ && (i->area[no][0] > 0 || i->area[no][1] > 0 || i->area[no][2] > 0 || i->area[no][3] > 0))) { gegl_buffer_cl_cache_flush (i->buffer[no], &i->rect[no]); } } } } else { /* complete pending write work */ for (no=0; noiterators;no++) { if (i->flags[no] == GEGL_CL_BUFFER_WRITE) { /* Wait Processing */ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion in the GPU (output) */ if (i->conv[no] == GEGL_CL_COLOR_CONVERT) for (j=0; j < i->n; j++) { cl_err = gegl_cl_color_conv (i->tex_op[no][j], i->tex_buf[no][j], i->size[no][j], i->format[no], i->buffer[no]->soft_format); if (cl_err == FALSE) CL_ERROR; } /* Wait Processing */ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; /* GPU -> CPU */ for (j=0; j < i->n; j++) { gpointer data; /* tile-ize */ if (i->conv[no] == GEGL_CL_COLOR_NOT_SUPPORTED) { data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE, CL_MAP_READ, 0, i->size[no][j] * i->op_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion using BABL */ gegl_buffer_set (i->buffer[no], &i->roi[no][j], 0, i->format[no], data, GEGL_AUTO_ROWSTRIDE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } else #ifdef OPENCL_USE_CACHE { gegl_buffer_cl_cache_new (i->buffer[no], &i->roi[no][j], i->tex_buf[no][j]); /* don't release this texture */ i->tex_buf[no][j] = NULL; } #else { data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE, CL_MAP_READ, 0, i->size[no][j] * i->buf_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion using BABL */ gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->format[no], data, GEGL_AUTO_ROWSTRIDE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } #endif } } } /* Run! */ cl_err = gegl_clFinish(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; for (no=0; no < i->iterators; no++) for (j=0; j < i->n; j++) { if (i->tex_buf_from_cache [no][j]) { gboolean ok = gegl_buffer_cl_cache_release (i->tex_buf[no][j]); g_assert (ok); } if (i->tex_buf[no][j] && !i->tex_buf_from_cache [no][j]) gegl_clReleaseMemObject (i->tex_buf[no][j]); if (i->tex_op [no][j]) gegl_clReleaseMemObject (i->tex_op [no][j]); i->tex [no][j] = NULL; i->tex_buf[no][j] = NULL; i->tex_op [no][j] = NULL; } } g_assert (i->iterators > 0); result = (i->roi_no >= i->rois)? FALSE : TRUE; i->n = MIN(GEGL_CL_NTEX, i->rois - i->roi_no); /* then we iterate all */ for (no=0; noiterators;no++) { for (j = 0; j < i->n; j++) { GeglRectangle r = {i->rect[no].x + i->roi_all[i->roi_no+j].x - i->area[no][0], i->rect[no].y + i->roi_all[i->roi_no+j].y - i->area[no][2], i->roi_all[i->roi_no+j].width + i->area[no][0] + i->area[no][1], i->roi_all[i->roi_no+j].height + i->area[no][2] + i->area[no][3]}; i->roi [no][j] = r; i->size[no][j] = r.width * r.height; } if (i->flags[no] == GEGL_CL_BUFFER_READ) { for (j=0; j < i->n; j++) { gpointer data; /* un-tile */ switch (i->conv[no]) { case GEGL_CL_COLOR_NOT_SUPPORTED: { gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]); g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* pre-pinned memory */ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE, CL_MAP_WRITE, 0, i->size[no][j] * i->op_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion using BABL */ gegl_buffer_get (i->buffer[no], &i->roi[no][j], 1.0, i->format[no], data, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } case GEGL_CL_COLOR_EQUAL: { i->tex_buf[no][j] = gegl_buffer_cl_cache_get (i->buffer[no], &i->roi[no][j]); if (i->tex_buf[no][j]) i->tex_buf_from_cache [no][j] = TRUE; /* don't free texture from cache */ else { gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]); g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* pre-pinned memory */ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE, CL_MAP_WRITE, 0, i->size[no][j] * i->buf_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion will be performed in the GPU later */ gegl_buffer_get (i->buffer[no], &i->roi[no][j], 1.0, i->buffer[no]->soft_format, data, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } i->tex[no][j] = i->tex_buf[no][j]; break; } case GEGL_CL_COLOR_CONVERT: { i->tex_buf[no][j] = gegl_buffer_cl_cache_get (i->buffer[no], &i->roi[no][j]); if (i->tex_buf[no][j]) i->tex_buf_from_cache [no][j] = TRUE; /* don't free texture from cache */ else { gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]); g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* pre-pinned memory */ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE, CL_MAP_WRITE, 0, i->size[no][j] * i->buf_cl_format_size [no], 0, NULL, NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion will be performed in the GPU later */ gegl_buffer_get (i->buffer[no], &i->roi[no][j], 1.0, i->buffer[no]->soft_format, data, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE); cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data, 0, NULL, NULL); if (cl_err != CL_SUCCESS) CL_ERROR; } g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; /* color conversion in the GPU (input) */ g_assert (i->tex_buf[no][j] && i->tex_op[no][j]); cl_err = gegl_cl_color_conv (i->tex_buf[no][j], i->tex_op[no][j], i->size[no][j], i->buffer[no]->soft_format, i->format[no]); if (cl_err == FALSE) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } } } /* Wait Processing */ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue()); if (cl_err != CL_SUCCESS) CL_ERROR; } else if (i->flags[no] == GEGL_CL_BUFFER_WRITE) { for (j=0; j < i->n; j++) { switch (i->conv[no]) { case GEGL_CL_COLOR_NOT_SUPPORTED: { g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } case GEGL_CL_COLOR_EQUAL: { g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, /* cache */ i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_buf[no][j]; break; } case GEGL_CL_COLOR_CONVERT: { g_assert (i->tex_buf[no][j] == NULL); i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, /* cache */ i->size[no][j] * i->buf_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; break; } } } } else if (i->flags[no] == GEGL_CL_BUFFER_AUX) { for (j=0; j < i->n; j++) { g_assert (i->tex_op[no][j] == NULL); i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (), CL_MEM_READ_WRITE, i->size[no][j] * i->op_cl_format_size [no], NULL, &cl_err); if (cl_err != CL_SUCCESS) CL_ERROR; i->tex[no][j] = i->tex_op[no][j]; } } } i->roi_no += i->n; i->iteration_no++; if (result == FALSE) { for (no=0; noiterators;no++) { if (i->buffer[no]) { gint j; gboolean found = FALSE; for (j=0; jbuffer[no]==i->buffer[j]) { found = TRUE; break; } if (!found) gegl_buffer_unlock (i->buffer[no]); g_object_unref (i->buffer[no]); } } i->is_finished = TRUE; g_free (i->roi_all); g_slice_free (GeglBufferClIterators, i); } *err = FALSE; return result; error: for (no=0; noiterators;no++) for (j=0; j < i->n; j++) { if (i->tex_buf[no][j]) gegl_clReleaseMemObject (i->tex_buf[no][j]); if (i->tex_op [no][j]) gegl_clReleaseMemObject (i->tex_op [no][j]); i->tex [no][j] = NULL; i->tex_buf[no][j] = NULL; i->tex_op [no][j] = NULL; } *err = TRUE; return FALSE; } GeglBufferClIterator * gegl_buffer_cl_iterator_new (GeglBuffer *buffer, const GeglRectangle *roi, const Babl *format, guint flags, GeglAbyssPolicy abyss_policy) { GeglBufferClIterator *i = (gpointer)g_slice_new0 (GeglBufferClIterators); /* Because the iterator is nulled above, we can forgo explicitly setting * i->is_finished to FALSE. */ gegl_buffer_cl_iterator_add (i, buffer, roi, format, flags, abyss_policy); return i; } #undef CL_ERROR