Blob Blame History Raw
#include "config.h"
#include <stdlib.h>
#include <string.h>
#include <math.h>

#include <glib-object.h>
#include <glib/gprintf.h>

#include "gegl.h"
#include "gegl/gegl-debug.h"

#include "gegl-buffer-types.h"
#include "gegl-buffer-cl-iterator.h"
#include "gegl-buffer-cl-cache.h"
#include "gegl-buffer-private.h"
#include "gegl-tile-storage.h"
#include "gegl-utils.h"

#define CL_ERROR {GEGL_NOTE (GEGL_DEBUG_OPENCL, "Error in %s:%d@%s - %s\n", __FILE__, __LINE__, __func__, gegl_cl_errstring(cl_err)); goto error;}

typedef struct GeglBufferClIterators
{
  /* current region of interest */
  gint          n;
  size_t        size [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];  /* length of current data in pixels */
  cl_mem        tex  [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];
  GeglRectangle roi  [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];

  /* the following is private: */
  cl_mem        tex_buf [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];
  cl_mem        tex_op  [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];

  /* don't free textures loaded from cache */
  gboolean       tex_buf_from_cache [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];

  gint           iterators;
  gint           iteration_no;
  gboolean       is_finished;

  guint          flags          [GEGL_CL_BUFFER_MAX_ITERATORS];
  gint           area           [GEGL_CL_BUFFER_MAX_ITERATORS][4];

  GeglRectangle  rect           [GEGL_CL_BUFFER_MAX_ITERATORS]; /* the region we iterate on. They can be
                                                                   different from each other, but width
                                                                   and height are the same */

  const Babl    *format         [GEGL_CL_BUFFER_MAX_ITERATORS]; /* The format required for the data */
  GeglBuffer    *buffer         [GEGL_CL_BUFFER_MAX_ITERATORS];

  /* buffer->soft_format */
  size_t buf_cl_format_size     [GEGL_CL_BUFFER_MAX_ITERATORS];
  /* format */
  size_t op_cl_format_size      [GEGL_CL_BUFFER_MAX_ITERATORS];

  gegl_cl_color_op conv         [GEGL_CL_BUFFER_MAX_ITERATORS];

  /* total iteration */
  gint           rois;
  gint           roi_no;
  GeglRectangle *roi_all;

} GeglBufferClIterators;

gint
gegl_buffer_cl_iterator_add_2 (GeglBufferClIterator  *iterator,
                               GeglBuffer            *buffer,
                               const GeglRectangle   *result,
                               const Babl            *format,
                               guint                  flags,
                               gint                   left,
                               gint                   right,
                               gint                   top,
                               gint                   bottom,
                               GeglAbyssPolicy        abyss_policy)
{
  GeglBufferClIterators *i = (gpointer)iterator;
  gint self = 0;
  if (i->iterators+1 > GEGL_CL_BUFFER_MAX_ITERATORS)
    {
      g_error ("too many iterators (%i)", i->iterators+1);
    }

  if (i->iterators == 0) /* for sanity, we zero at init */
    {
      memset (i, 0, sizeof (GeglBufferClIterators));
    }

  self = i->iterators++;

  if (!result)
    result = self==0?&(buffer->extent):&(i->rect[0]);
  i->rect[self]=*result;

  i->flags[self]=flags;

  if (flags == GEGL_CL_BUFFER_WRITE || flags == GEGL_CL_BUFFER_READ)
    {
      g_assert (buffer);

      i->buffer[self]= g_object_ref (buffer);

      if (format)
        i->format[self]=format;
      else
        i->format[self]=buffer->soft_format;

      if (flags == GEGL_CL_BUFFER_WRITE)
        i->conv[self] = gegl_cl_color_supported (format, buffer->soft_format);
      else
        i->conv[self] = gegl_cl_color_supported (buffer->soft_format, format);

      gegl_cl_color_babl (buffer->soft_format, &i->buf_cl_format_size[self]);
      gegl_cl_color_babl (format,              &i->op_cl_format_size [self]);
    }
  else /* GEGL_CL_BUFFER_AUX */
    {
      g_assert (buffer == NULL);

      i->buffer[self] = NULL;
      i->format[self] = NULL;
      i->conv[self]   = -1;
      i->buf_cl_format_size[self] = SIZE_MAX;

      gegl_cl_color_babl (format, &i->op_cl_format_size [self]);
    }

  i->area[self][0] = left;
  i->area[self][1] = right;
  i->area[self][2] = top;
  i->area[self][3] = bottom;

  if (flags == GEGL_CL_BUFFER_WRITE
      && (left > 0 || right > 0 || top > 0 || bottom > 0))
	g_assert(FALSE);

  if (self!=0)
    {
      /* we make all subsequently added iterators share the width and height of the first one */
      i->rect[self].width  = i->rect[0].width;
      i->rect[self].height = i->rect[0].height;
    }
  else
    {
      gint x, y, j;

      i->rois = 0;
      for (y=result->y; y < result->y + result->height; y += gegl_cl_get_iter_height ())
        for (x=result->x; x < result->x + result->width;  x += gegl_cl_get_iter_width ())
          i->rois++;

      i->roi_no = 0;
      i->roi_all = g_new0 (GeglRectangle, i->rois);

      j = 0;
      for (y=0; y < result->height; y += gegl_cl_get_iter_height ())
        for (x=0; x < result->width;  x += gegl_cl_get_iter_width ())
          {
            GeglRectangle r = {x, y,
                               MIN(gegl_cl_get_iter_width (),  result->width  - x),
                               MIN(gegl_cl_get_iter_height (), result->height - y)};
            i->roi_all[j] = r;
            j++;
          }
    }

  return self;
}

gint
gegl_buffer_cl_iterator_add (GeglBufferClIterator  *iterator,
                             GeglBuffer            *buffer,
                             const GeglRectangle   *result,
                             const Babl            *format,
                             guint                  flags,
                             GeglAbyssPolicy        abyss_policy)
{
  return gegl_buffer_cl_iterator_add_2 (iterator, buffer, result, format, flags, 0,0,0,0, abyss_policy);
}

#define OPENCL_USE_CACHE 1

gboolean
gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
{
  GeglBufferClIterators *i = (gpointer)iterator;
  gboolean result = FALSE;
  gint no, j;
  cl_int cl_err = 0;

  if (i->is_finished)
    g_error ("%s called on finished buffer iterator", G_STRFUNC);
  if (i->iteration_no == 0)
    {
      for (no=0; no<i->iterators;no++)
        {
          if (i->buffer[no])
            {
              gint j;
              gboolean found = FALSE;
              for (j=0; j<no; j++)
                if (i->buffer[no]==i->buffer[j])
                  {
                    found = TRUE;
                    break;
                  }
              if (!found)
                gegl_buffer_lock (i->buffer[no]);

              if (i->flags[no] == GEGL_CL_BUFFER_WRITE
                  || (i->flags[no] == GEGL_CL_BUFFER_READ
                      && (i->area[no][0] > 0 || i->area[no][1] > 0 || i->area[no][2] > 0 || i->area[no][3] > 0)))
                {
                  gegl_buffer_cl_cache_flush (i->buffer[no], &i->rect[no]);
                }
            }
        }
    }
  else
    {
      /* complete pending write work */
      for (no=0; no<i->iterators;no++)
        {
          if (i->flags[no] == GEGL_CL_BUFFER_WRITE)
            {
              /* Wait Processing */
              cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
              if (cl_err != CL_SUCCESS) CL_ERROR;

              /* color conversion in the GPU (output) */
              if (i->conv[no] == GEGL_CL_COLOR_CONVERT)
                for (j=0; j < i->n; j++)
                  {
                    cl_err = gegl_cl_color_conv (i->tex_op[no][j], i->tex_buf[no][j], i->size[no][j],
                                                 i->format[no], i->buffer[no]->soft_format);
                    if (cl_err == FALSE) CL_ERROR;
                  }

              /* Wait Processing */
              cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
              if (cl_err != CL_SUCCESS) CL_ERROR;

              /* GPU -> CPU */
              for (j=0; j < i->n; j++)
                {
                  gpointer data;

                  /* tile-ize */
                  if (i->conv[no] == GEGL_CL_COLOR_NOT_SUPPORTED)
                    {
                      data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE,
                                                     CL_MAP_READ,
                                                     0, i->size[no][j] * i->op_cl_format_size [no],
                                                     0, NULL, NULL, &cl_err);
                      if (cl_err != CL_SUCCESS) CL_ERROR;

                      /* color conversion using BABL */
                      gegl_buffer_set (i->buffer[no], &i->roi[no][j], 0, i->format[no], data, GEGL_AUTO_ROWSTRIDE);

                      cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data,
                                                             0, NULL, NULL);
                      if (cl_err != CL_SUCCESS) CL_ERROR;
                    }
                  else
#ifdef OPENCL_USE_CACHE
                    {
                      gegl_buffer_cl_cache_new (i->buffer[no], &i->roi[no][j], i->tex_buf[no][j]);
                      /* don't release this texture */
                      i->tex_buf[no][j] = NULL;
                    }
#else
                    {
                      data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
                                                     CL_MAP_READ,
                                                     0, i->size[no][j] * i->buf_cl_format_size [no],
                                                     0, NULL, NULL, &cl_err);
                      if (cl_err != CL_SUCCESS) CL_ERROR;

                      /* color conversion using BABL */
                      gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->format[no], data, GEGL_AUTO_ROWSTRIDE);

                      cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data,
                                                             0, NULL, NULL);
                      if (cl_err != CL_SUCCESS) CL_ERROR;
                    }
#endif
                }
            }
        }

      /* Run! */
      cl_err = gegl_clFinish(gegl_cl_get_command_queue());
      if (cl_err != CL_SUCCESS) CL_ERROR;

      for (no=0; no < i->iterators; no++)
        for (j=0; j < i->n; j++)
          {
            if (i->tex_buf_from_cache [no][j])
              {
                gboolean ok = gegl_buffer_cl_cache_release (i->tex_buf[no][j]);
                g_assert (ok);
              }

            if (i->tex_buf[no][j] && !i->tex_buf_from_cache [no][j])
              gegl_clReleaseMemObject (i->tex_buf[no][j]);

            if (i->tex_op [no][j])
              gegl_clReleaseMemObject (i->tex_op [no][j]);

            i->tex    [no][j] = NULL;
            i->tex_buf[no][j] = NULL;
            i->tex_op [no][j] = NULL;
          }
    }

  g_assert (i->iterators > 0);
  result = (i->roi_no >= i->rois)? FALSE : TRUE;

  i->n = MIN(GEGL_CL_NTEX, i->rois - i->roi_no);

  /* then we iterate all */
  for (no=0; no<i->iterators;no++)
    {
      for (j = 0; j < i->n; j++)
        {
          GeglRectangle r = {i->rect[no].x + i->roi_all[i->roi_no+j].x - i->area[no][0],
                             i->rect[no].y + i->roi_all[i->roi_no+j].y - i->area[no][2],
                             i->roi_all[i->roi_no+j].width             + i->area[no][0] + i->area[no][1],
                             i->roi_all[i->roi_no+j].height            + i->area[no][2] + i->area[no][3]};
          i->roi [no][j] = r;
          i->size[no][j] = r.width * r.height;
        }

      if (i->flags[no] == GEGL_CL_BUFFER_READ)
        {
          for (j=0; j < i->n; j++)
            {
              gpointer data;

              /* un-tile */
              switch (i->conv[no])
                {
                  case GEGL_CL_COLOR_NOT_SUPPORTED:

                    {
                    gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]);

                    g_assert (i->tex_op[no][j] == NULL);
                    i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                            CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
                                                            i->size[no][j] * i->op_cl_format_size [no],
                                                            NULL, &cl_err);
                    if (cl_err != CL_SUCCESS) CL_ERROR;

                    /* pre-pinned memory */
                    data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE,
                                                   CL_MAP_WRITE,
                                                   0, i->size[no][j] * i->op_cl_format_size [no],
                                                   0, NULL, NULL, &cl_err);
                    if (cl_err != CL_SUCCESS) CL_ERROR;

                    /* color conversion using BABL */
                    gegl_buffer_get (i->buffer[no], &i->roi[no][j], 1.0, i->format[no], data, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE);

                    cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data,
                                                               0, NULL, NULL);
                    if (cl_err != CL_SUCCESS) CL_ERROR;

                    i->tex[no][j] = i->tex_op[no][j];

                    break;
                    }

                  case GEGL_CL_COLOR_EQUAL:

                    {
                    i->tex_buf[no][j] = gegl_buffer_cl_cache_get (i->buffer[no], &i->roi[no][j]);

                    if (i->tex_buf[no][j])
                      i->tex_buf_from_cache [no][j] = TRUE; /* don't free texture from cache */
                    else
                      {
                        gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]);

                        g_assert (i->tex_buf[no][j] == NULL);
                        i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                                 CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
                                                                 i->size[no][j] * i->buf_cl_format_size [no],
                                                                 NULL, &cl_err);
                        if (cl_err != CL_SUCCESS) CL_ERROR;

                        /* pre-pinned memory */
                        data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
                                                       CL_MAP_WRITE,
                                                       0, i->size[no][j] * i->buf_cl_format_size [no],
                                                       0, NULL, NULL, &cl_err);
                        if (cl_err != CL_SUCCESS) CL_ERROR;

                        /* color conversion will be performed in the GPU later */
                        gegl_buffer_get (i->buffer[no], &i->roi[no][j], 1.0, i->buffer[no]->soft_format, data, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE);

                        cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data,
                                                               0, NULL, NULL);
                        if (cl_err != CL_SUCCESS) CL_ERROR;
                      }

                    i->tex[no][j] = i->tex_buf[no][j];

                    break;
                    }

                  case GEGL_CL_COLOR_CONVERT:

                    {
                    i->tex_buf[no][j] = gegl_buffer_cl_cache_get (i->buffer[no], &i->roi[no][j]);

                    if (i->tex_buf[no][j])
                      i->tex_buf_from_cache [no][j] = TRUE; /* don't free texture from cache */
                    else
                      {
                        gegl_buffer_cl_cache_flush (i->buffer[no], &i->roi[no][j]);

                        g_assert (i->tex_buf[no][j] == NULL);
                        i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                                 CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
                                                                 i->size[no][j] * i->buf_cl_format_size [no],
                                                                 NULL, &cl_err);
                        if (cl_err != CL_SUCCESS) CL_ERROR;

                        /* pre-pinned memory */
                        data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
                                                       CL_MAP_WRITE,
                                                       0, i->size[no][j] * i->buf_cl_format_size [no],
                                                       0, NULL, NULL, &cl_err);
                        if (cl_err != CL_SUCCESS) CL_ERROR;

                        /* color conversion will be performed in the GPU later */
                        gegl_buffer_get (i->buffer[no], &i->roi[no][j], 1.0, i->buffer[no]->soft_format, data, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE);

                        cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data,
                                                               0, NULL, NULL);
                        if (cl_err != CL_SUCCESS) CL_ERROR;
                      }

                    g_assert (i->tex_op[no][j] == NULL);
                    i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                            CL_MEM_READ_WRITE,
                                                            i->size[no][j] * i->op_cl_format_size [no],
                                                            NULL, &cl_err);
                    if (cl_err != CL_SUCCESS) CL_ERROR;

                    /* color conversion in the GPU (input) */
                    g_assert (i->tex_buf[no][j] && i->tex_op[no][j]);
                    cl_err = gegl_cl_color_conv (i->tex_buf[no][j], i->tex_op[no][j], i->size[no][j],
                                                 i->buffer[no]->soft_format, i->format[no]);
                    if (cl_err == FALSE) CL_ERROR;

                    i->tex[no][j] = i->tex_op[no][j];

                    break;
                    }
                }
            }

          /* Wait Processing */
          cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
          if (cl_err != CL_SUCCESS) CL_ERROR;
        }
      else if (i->flags[no] == GEGL_CL_BUFFER_WRITE)
        {
          for (j=0; j < i->n; j++)
            {
              switch (i->conv[no])
                {
                  case GEGL_CL_COLOR_NOT_SUPPORTED:

                  {
                  g_assert (i->tex_op[no][j] == NULL);
                  i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                          CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY,
                                                          i->size[no][j] * i->op_cl_format_size [no],
                                                          NULL, &cl_err);
                  if (cl_err != CL_SUCCESS) CL_ERROR;

                  i->tex[no][j] = i->tex_op[no][j];

                  break;
                  }

                  case GEGL_CL_COLOR_EQUAL:

                  {
                  g_assert (i->tex_buf[no][j] == NULL);
                  i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                           CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, /* cache */
                                                           i->size[no][j] * i->buf_cl_format_size [no],
                                                           NULL, &cl_err);
                  if (cl_err != CL_SUCCESS) CL_ERROR;

                  i->tex[no][j] = i->tex_buf[no][j];

                  break;
                  }

                  case GEGL_CL_COLOR_CONVERT:

                  {
                  g_assert (i->tex_buf[no][j] == NULL);
                  i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                           CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, /* cache */
                                                           i->size[no][j] * i->buf_cl_format_size [no],
                                                           NULL, &cl_err);
                  if (cl_err != CL_SUCCESS) CL_ERROR;

                  g_assert (i->tex_op[no][j] == NULL);
                  i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                          CL_MEM_READ_WRITE,
                                                          i->size[no][j] * i->op_cl_format_size [no],
                                                          NULL, &cl_err);
                  if (cl_err != CL_SUCCESS) CL_ERROR;

                  i->tex[no][j] = i->tex_op[no][j];

                  break;
                  }
               }
            }
        }
      else if (i->flags[no] == GEGL_CL_BUFFER_AUX)
        {
          for (j=0; j < i->n; j++)
            {
              g_assert (i->tex_op[no][j] == NULL);
              i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
                                                      CL_MEM_READ_WRITE,
                                                      i->size[no][j] * i->op_cl_format_size [no],
                                                      NULL, &cl_err);
              if (cl_err != CL_SUCCESS) CL_ERROR;

              i->tex[no][j] = i->tex_op[no][j];
            }
        }
    }

  i->roi_no += i->n;

  i->iteration_no++;

  if (result == FALSE)
    {
      for (no=0; no<i->iterators;no++)
        {
          if (i->buffer[no])
            {
              gint j;
              gboolean found = FALSE;
              for (j=0; j<no; j++)
                if (i->buffer[no]==i->buffer[j])
                  {
                    found = TRUE;
                    break;
                  }
              if (!found)
                gegl_buffer_unlock (i->buffer[no]);

              g_object_unref (i->buffer[no]);
            }
        }

      i->is_finished = TRUE;

      g_free (i->roi_all);
      g_slice_free (GeglBufferClIterators, i);
    }

  *err = FALSE;
  return result;

error:

  for (no=0; no<i->iterators;no++)
    for (j=0; j < i->n; j++)
      {
        if (i->tex_buf[no][j]) gegl_clReleaseMemObject (i->tex_buf[no][j]);
        if (i->tex_op [no][j]) gegl_clReleaseMemObject (i->tex_op [no][j]);

        i->tex    [no][j] = NULL;
        i->tex_buf[no][j] = NULL;
        i->tex_op [no][j] = NULL;
      }

  *err = TRUE;
  return FALSE;
}

GeglBufferClIterator *
gegl_buffer_cl_iterator_new (GeglBuffer          *buffer,
                             const GeglRectangle *roi,
                             const Babl          *format,
                             guint                flags,
                             GeglAbyssPolicy      abyss_policy)
{
  GeglBufferClIterator *i = (gpointer)g_slice_new0 (GeglBufferClIterators);
  /* Because the iterator is nulled above, we can forgo explicitly setting
   * i->is_finished to FALSE. */
  gegl_buffer_cl_iterator_add (i, buffer, roi, format, flags, abyss_policy);
  return i;
}

#undef CL_ERROR