/*
 * Copyright © 2019 Raspberry Pi
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "v3dv_private.h"

#include <errno.h>
#include <sys/mman.h>

#include "drm-uapi/v3d_drm.h"
#include "util/u_memory.h"

/* Default max size of the bo cache, in MB.
 *
 * FIXME: we got this value when testing some apps using the rpi4 with 4GB,
 * but it should depend on the total amount of RAM. But for that we would need
 * to test on real hw with different amount of RAM. Using this value for now.
 */
#define DEFAULT_MAX_BO_CACHE_SIZE 512

/* Discarded to use a V3D_DEBUG for this, as it would mean adding a run-time
 * check for most of the calls
 */
static const bool dump_stats = false;

static void
bo_dump_stats(struct v3dv_device *device)
{
   struct v3dv_bo_cache *cache = &device->bo_cache;

   fprintf(stderr, "  BOs allocated:   %d\n", device->bo_count);
   fprintf(stderr, "  BOs size:        %dkb\n", device->bo_size / 1024);
   fprintf(stderr, "  BOs cached:      %d\n", cache->cache_count);
   fprintf(stderr, "  BOs cached size: %dkb\n", cache->cache_size / 1024);

   if (!list_is_empty(&cache->time_list)) {
      struct v3dv_bo *first = list_first_entry(&cache->time_list,
                                              struct v3dv_bo,
                                              time_list);
      struct v3dv_bo *last = list_last_entry(&cache->time_list,
                                            struct v3dv_bo,
                                            time_list);

      fprintf(stderr, "  oldest cache time: %ld\n",
              (long)first->free_time);
      fprintf(stderr, "  newest cache time: %ld\n",
              (long)last->free_time);

      struct timespec time;
      clock_gettime(CLOCK_MONOTONIC, &time);
      fprintf(stderr, "  now:               %ld\n",
              time.tv_sec);
   }

   if (cache->size_list_size) {
      uint32_t empty_size_list = 0;
      for (uint32_t i = 0; i < cache->size_list_size; i++) {
         if (list_is_empty(&cache->size_list[i]))
            empty_size_list++;
      }
      fprintf(stderr, "  Empty size_list lists: %d\n", empty_size_list);
   }
}

static void
bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo)
{
   list_del(&bo->time_list);
   list_del(&bo->size_list);

   cache->cache_count--;
   cache->cache_size -= bo->size;
}

static struct v3dv_bo *
bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name)
{
   struct v3dv_bo_cache *cache = &device->bo_cache;
   uint32_t page_index = size / 4096 - 1;

   if (cache->size_list_size <= page_index)
      return NULL;

   struct v3dv_bo *bo = NULL;

   mtx_lock(&cache->lock);
   if (!list_is_empty(&cache->size_list[page_index])) {
      bo = list_first_entry(&cache->size_list[page_index],
                            struct v3dv_bo, size_list);

      /* Check that the BO has gone idle.  If not, then we want to
       * allocate something new instead, since we assume that the
       * user will proceed to CPU map it and fill it with stuff.
       */
      if (!v3dv_bo_wait(device, bo, 0)) {
         mtx_unlock(&cache->lock);
         return NULL;
      }

      bo_remove_from_cache(cache, bo);

      bo->name = name;
   }
   mtx_unlock(&cache->lock);
   return bo;
}

static bool
bo_free(struct v3dv_device *device,
        struct v3dv_bo *bo)
{
   if (!bo)
      return true;

   if (bo->map)
      v3dv_bo_unmap(device, bo);

   struct drm_gem_close c;
   memset(&c, 0, sizeof(c));
   c.handle = bo->handle;
   int ret = v3dv_ioctl(device->pdevice->render_fd, DRM_IOCTL_GEM_CLOSE, &c);
   if (ret != 0)
      fprintf(stderr, "close object %d: %s\n", bo->handle, strerror(errno));

   device->bo_count--;
   device->bo_size -= bo->size;

   if (dump_stats) {
      fprintf(stderr, "Freed %s%s%dkb:\n",
              bo->name ? bo->name : "",
              bo->name ? " " : "",
              bo->size / 1024);
      bo_dump_stats(device);
   }

   vk_free(&device->vk.alloc, bo);

   return ret == 0;
}

static void
bo_cache_free_all(struct v3dv_device *device,
                       bool with_lock)
{
   struct v3dv_bo_cache *cache = &device->bo_cache;

   if (with_lock)
      mtx_lock(&cache->lock);
   list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
                            time_list) {
      bo_remove_from_cache(cache, bo);
      bo_free(device, bo);
   }
   if (with_lock)
      mtx_unlock(&cache->lock);

}

void
v3dv_bo_init(struct v3dv_bo *bo,
             uint32_t handle,
             uint32_t size,
             uint32_t offset,
             const char *name,
             bool private)
{
   bo->handle = handle;
   bo->handle_bit = 1ull << (handle % 64);
   bo->size = size;
   bo->offset = offset;
   bo->map = NULL;
   bo->map_size = 0;
   bo->name = name;
   bo->private = private;
   bo->dumb_handle = -1;
   list_inithead(&bo->list_link);
}

struct v3dv_bo *
v3dv_bo_alloc(struct v3dv_device *device,
              uint32_t size,
              const char *name,
              bool private)
{
   struct v3dv_bo *bo;

   const uint32_t page_align = 4096; /* Always allocate full pages */
   size = align(size, page_align);

   if (private) {
      bo = bo_from_cache(device, size, name);
      if (bo) {
         if (dump_stats) {
            fprintf(stderr, "Allocated %s %dkb from cache:\n",
                    name, size / 1024);
            bo_dump_stats(device);
         }
         return bo;
      }
   }

   bo = vk_alloc(&device->vk.alloc, sizeof(struct v3dv_bo), 8,
                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);

   if (!bo) {
      fprintf(stderr, "Failed to allocate host memory for BO\n");
      return NULL;
   }

 retry:
   ;

   bool cleared_and_retried = false;
   struct drm_v3d_create_bo create = {
      .size = size
   };

   int ret = v3dv_ioctl(device->pdevice->render_fd,
                        DRM_IOCTL_V3D_CREATE_BO, &create);
   if (ret != 0) {
      if (!list_is_empty(&device->bo_cache.time_list) &&
          !cleared_and_retried) {
         cleared_and_retried = true;
         bo_cache_free_all(device, true);
         goto retry;
      }

      vk_free(&device->vk.alloc, bo);
      fprintf(stderr, "Failed to allocate device memory for BO\n");
      return NULL;
   }

   assert(create.offset % page_align == 0);
   assert((create.offset & 0xffffffff) == create.offset);

   v3dv_bo_init(bo, create.handle, size, create.offset, name, private);

   device->bo_count++;
   device->bo_size += bo->size;
   if (dump_stats) {
      fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
      bo_dump_stats(device);
   }

   return bo;
}

bool
v3dv_bo_map_unsynchronized(struct v3dv_device *device,
                           struct v3dv_bo *bo,
                           uint32_t size)
{
   assert(bo != NULL && size <= bo->size);

   if (bo->map)
      return bo->map;

   struct drm_v3d_mmap_bo map;
   memset(&map, 0, sizeof(map));
   map.handle = bo->handle;
   int ret = v3dv_ioctl(device->pdevice->render_fd,
                        DRM_IOCTL_V3D_MMAP_BO, &map);
   if (ret != 0) {
      fprintf(stderr, "map ioctl failure\n");
      return false;
   }

   bo->map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
                  device->pdevice->render_fd, map.offset);
   if (bo->map == MAP_FAILED) {
      fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
              bo->handle, (long long)map.offset, (uint32_t)bo->size);
      return false;
   }
   VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));

   bo->map_size = size;

   return true;
}

bool
v3dv_bo_wait(struct v3dv_device *device,
             struct v3dv_bo *bo,
             uint64_t timeout_ns)
{
   struct drm_v3d_wait_bo wait = {
      .handle = bo->handle,
      .timeout_ns = timeout_ns,
   };
   return v3dv_ioctl(device->pdevice->render_fd,
                     DRM_IOCTL_V3D_WAIT_BO, &wait) == 0;
}

bool
v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size)
{
   assert(bo && size <= bo->size);

   bool ok = v3dv_bo_map_unsynchronized(device, bo, size);
   if (!ok)
      return false;

   ok = v3dv_bo_wait(device, bo, PIPE_TIMEOUT_INFINITE);
   if (!ok) {
      fprintf(stderr, "memory wait for map failed\n");
      return false;
   }

   return true;
}

void
v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo)
{
   assert(bo && bo->map && bo->map_size > 0);

   munmap(bo->map, bo->map_size);
   VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
   bo->map = NULL;
   bo->map_size = 0;
}

static boolean
reallocate_size_list(struct v3dv_bo_cache *cache,
                     struct v3dv_device *device,
                     uint32_t size)
{
   struct list_head *new_list =
      vk_alloc(&device->vk.alloc, sizeof(struct list_head) * size, 8,
               VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);

   if (!new_list) {
      fprintf(stderr, "Failed to allocate host memory for cache bo list\n");
      return false;
   }
   struct list_head *old_list = cache->size_list;

   /* Move old list contents over (since the array has moved, and
    * therefore the pointers to the list heads have to change).
    */
   for (int i = 0; i < cache->size_list_size; i++) {
      struct list_head *old_head = &cache->size_list[i];
      if (list_is_empty(old_head)) {
         list_inithead(&new_list[i]);
      } else {
         new_list[i].next = old_head->next;
         new_list[i].prev = old_head->prev;
         new_list[i].next->prev = &new_list[i];
         new_list[i].prev->next = &new_list[i];
      }
   }
   for (int i = cache->size_list_size; i < size; i++)
      list_inithead(&new_list[i]);

   cache->size_list = new_list;
   cache->size_list_size = size;
   vk_free(&device->vk.alloc, old_list);

   return true;
}

void
v3dv_bo_cache_init(struct v3dv_device *device)
{
   device->bo_size = 0;
   device->bo_count = 0;
   list_inithead(&device->bo_cache.time_list);
   /* FIXME: perhaps set a initial size for the size-list, to avoid run-time
    * reallocations
    */
   device->bo_cache.size_list_size = 0;

   const char *max_cache_size_str = getenv("V3DV_MAX_BO_CACHE_SIZE");
   if (max_cache_size_str == NULL)
      device->bo_cache.max_cache_size = DEFAULT_MAX_BO_CACHE_SIZE;
   else
      device->bo_cache.max_cache_size = atoll(max_cache_size_str);

   if (dump_stats) {
      fprintf(stderr, "MAX BO CACHE SIZE: %iMB\n", device->bo_cache.max_cache_size);
   }

   device->bo_cache.max_cache_size *= 1024 * 1024;
   device->bo_cache.cache_count = 0;
   device->bo_cache.cache_size = 0;
}

void
v3dv_bo_cache_destroy(struct v3dv_device *device)
{
   bo_cache_free_all(device, true);
   vk_free(&device->vk.alloc, device->bo_cache.size_list);

   if (dump_stats) {
      fprintf(stderr, "BO stats after screen destroy:\n");
      bo_dump_stats(device);
   }
}


static void
free_stale_bos(struct v3dv_device *device,
               time_t time)
{
   struct v3dv_bo_cache *cache = &device->bo_cache;
   bool freed_any = false;

   list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
                            time_list) {
      /* If it's more than a second old, free it. */
      if (time - bo->free_time > 2) {
         if (dump_stats && !freed_any) {
            fprintf(stderr, "Freeing stale BOs:\n");
            bo_dump_stats(device);
            freed_any = true;
         }

         bo_remove_from_cache(cache, bo);
         bo_free(device, bo);
      } else {
         break;
      }
   }

   if (dump_stats && freed_any) {
      fprintf(stderr, "Freed stale BOs:\n");
      bo_dump_stats(device);
   }
}

bool
v3dv_bo_free(struct v3dv_device *device,
             struct v3dv_bo *bo)
{
   if (!bo)
      return true;

   struct timespec time;
   struct v3dv_bo_cache *cache = &device->bo_cache;
   uint32_t page_index = bo->size / 4096 - 1;

   if (bo->private &&
       bo->size > cache->max_cache_size - cache->cache_size) {
      clock_gettime(CLOCK_MONOTONIC, &time);
      mtx_lock(&cache->lock);
      free_stale_bos(device, time.tv_sec);
      mtx_unlock(&cache->lock);
   }

   if (!bo->private ||
       bo->size > cache->max_cache_size - cache->cache_size) {
      return bo_free(device, bo);
   }

   clock_gettime(CLOCK_MONOTONIC, &time);
   mtx_lock(&cache->lock);

   if (cache->size_list_size <= page_index) {
      if (!reallocate_size_list(cache, device, page_index + 1)) {
         bool outcome = bo_free(device, bo);
         /* If the reallocation failed, it usually means that we are out of
          * memory, so we also free all the bo cache. We need to call it to
          * not use the cache lock, as we are already under it.
          */
         bo_cache_free_all(device, false);
         mtx_unlock(&cache->lock);
         return outcome;
      }
   }

   bo->free_time = time.tv_sec;
   list_addtail(&bo->size_list, &cache->size_list[page_index]);
   list_addtail(&bo->time_list, &cache->time_list);

   cache->cache_count++;
   cache->cache_size += bo->size;

   if (dump_stats) {
      fprintf(stderr, "Freed %s %dkb to cache:\n",
              bo->name, bo->size / 1024);
      bo_dump_stats(device);
   }
   bo->name = NULL;

   free_stale_bos(device, time.tv_sec);

   mtx_unlock(&cache->lock);

   return true;
}
