/* -*- mode: c; c-file-style: "gnu" -*-
 * fabs.c -- file abstraction layer
 * Copyright (C) 2002, 2003, 2004 Gergely Nagy <algernon@bonehunter.rulez.org>
 *
 * This file is part of Thy.
 *
 * Thy is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 dated June, 1991.
 *
 * Thy is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
 * License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

/** @file fabs.c
 * File abstraction layer.
 *
 * The file abstraction layer serves as a kind of cache. Since each
 * opened file adds a bit to the used resources, it makes sense to not
 * open a file twice. This is where FABS comes in: it presents a
 * wrapper around the standard open() and close() calls, which
 * remember if a file was opened already, and return the existing file
 * descriptor. Or open the file, if it is not already. Of course,
 * proper reference counting is done too.
 *
 * However, there is a hidden assumption here, namely that file
 * descriptors are assigned sequentially. If a system assigns them in
 * random order (especially if high numbers are used too), the kind of
 * storage used here breaks horribly.
 */

#include "system.h"

#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#if HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <time.h>

#include "compat/compat.h"
#include "bh-libs/lru.h"

#include "config.h"
#include "fabs.h"
#include "http.h"
#include "thy.h"
#include "worker.h"

/** @internal Internal type for the file abstraction layer.
 * Holds the name of the file and the reference count. The file
 * descriptor is not stored here, since the index of the entry in the
 * fabs_fds array is the number of the FD itself.
 */
typedef struct
{
  char *file; /**< The name of the file. */
  int ref; /**< The reference counter. */
  size_t size; /**< Size of the file. */
  void *mmap_cache; /**< mmap() cache. */
} fabs_t;

static int max; /**< @internal Highest numbered FD in the set. */
/** @internal Statis-size array of open filedescriptors.
 * Its size is determined at run-time, then it is initialized by
 * fabs_init().
 */
static fabs_t *fabs_fds;

/** @internal An entry in the file cache.
 * Holds the original and resolved name of a file, and
 * optionally the result of stat()ing it.
 */
typedef struct
{
  char *orig; /**< Original file name. */
  char *canon; /**< Resolved file name. */
  struct stat *st; /**< stat() of the resolved file. */
  time_t age; /**< Age of the entry. This is the last time it was
		 updated. */
} fabs_fcache_entry_t;

/** @internal An entry in the URL cache.
 * Holds the original URL and HOST, and the filename they map to.
 */
typedef struct
{
  char *url; /**< Original URL. */
  char *host; /**< Original host. */

  char *file; /**< Resolved filename. */
  time_t age; /**< Age of the entry. This is the last time it was
		 updated. */
} fabs_ucache_entry_t;

/** @internal File meta-data cache.
 * This is an LRU cache of certain file meta-data, used for caching
 * real path lookups and stat() calls.
 */
static void *fabs_fcache;
/** @internal URL-to-file cache.
 * This is an LRU cache of URL-to-file mappings, used to resolve URLs.
 */
static void *fabs_ucache;

/** @internal Cache entry expiration time.
 * fabs_init() initialises this to config->lru.expire. So, this is
 * really a cache variable, so the fcache functions don't have to
 * config_get() around.
 */
static time_t fabs_fcache_expire = _THY_LRUEXPIRE;

/** @internal Cached GID. */
static gid_t fabs_gid = 0;
/** @internal Cached UID. */
static uid_t fabs_uid = 0;
/** @internal Number of bytes mmapped into memory. */
static size_t fabs_mmapped = 0;

/** @internal Function to free a fabs_fcache_entry_t element.
 * This function is used by bhl_lru_new() and bhl_lru_add(). The
 * latter uses it to free an element that is being aged out.
 *
 * @param ptr is the element to free.
 */
static void
_fabs_fcache_free (void *ptr)
{
  fabs_fcache_entry_t *t = (fabs_fcache_entry_t *)ptr;

  if (!t)
    return;

  free (t->orig);
  free (t->canon);
  free (t->st);
}

/** Initialise the FABS system.
 * Initialies the FD, URL mapping and file meta-data caches.
 */
void
fabs_init (void)
{
  const config_t *config = config_get ();

  max = -1;
  fabs_fds = (fabs_t *)bhc_calloc ((size_t)(_THY_MAXCONN + 1),
				   sizeof (fabs_t));
  fabs_fcache = bhl_lru_new (config->lru.fcache, _fabs_fcache_free);
  fabs_ucache = bhl_lru_new (config->lru.ucache, _fabs_fcache_free);

  fabs_fcache_expire = config->lru.expire;

  fabs_gid = getgid ();
  fabs_uid = getuid ();
}

/** Wrapper around the standard open() call.
 * "Open" FILE with FLAGS. First, it searches through fabs_fds, to see
 * if FILE is already open. If yes, return its descriptor (and
 * increment its reference counter). If not, then open it, place it in
 * the cache, and return the new fd.
 *
 * @param file is the file to open.
 * @param flags is the value to pass to open(). It is ignored if the
 * fiile is already open.
 *
 * @returns A file descriptor, or -1 on error.
 */
int
fabs_open (const char *file, int flags)
{
  int i;

  for (i = 0; i <= max; i++)
    {
      if (fabs_fds[i].ref > 0 && !strcmp (fabs_fds[i].file, file))
	{
	  fabs_fds[i].ref++;
	  return i;
	}
    }

  if ((i = open (file, O_RDONLY | flags)) != -1)
    {
      fabs_fds[i].ref = 1;
      fabs_fds[i].file = bhc_strdup (file);
      max = i;

      thy_worker_register (i, file);
    }

  return i;
}

/** Close a file descriptor.
 * Decrement its reference counter, and if it hits zero, close it, and
 * remove it from the cache.
 *
 * @returns Zero on success, or -1 if an error occurred.
 */
int
fabs_close (int fd)
{
  fabs_fds[fd].ref--;

  if (fd < 0 || max < 0)
    return 0;

  if (fabs_fds[fd].ref == 0)
    {
      int i;

      free (fabs_fds[fd].file);
      fabs_fds[fd].file = NULL;

      for (i = max; i >= 0; i--)
	{
	  if (fabs_fds[i].ref > 0)
	    {
	      max = i;
	      break;
	    }
	}

      thy_worker_unregister (fd);

      return close (fd);
    }

  return 0;
}

/** Map a file descriptor into memory.
 * Maps a file descriptor into memory, using mmap() if
 * available. Otherwise this is a no-op.
 *
 * @param fd is the file descriptor to map into memory.
 */
void
fabs_mmap (int fd)
{
#if HAVE_MMAP
  struct stat st;
  const config_t *config = config_get ();

  if (fd < 0)
    return;

  if (fstat (fd, &st) == 0)
    {
      if (st.st_size + fabs_mmapped >= config->limits.mmap &&
	  config->limits.mmap != 0)
	return;

      fabs_mmapped += st.st_size;
      fabs_fds[fd].size = st.st_size;
      fabs_fds[fd].mmap_cache = mmap (NULL, st.st_size,
				      PROT_READ, MAP_SHARED, fd, 0);
    }
#endif
}

/** Unmap a file descriptor from memory.
 * Unmap a previously mapped file descriptor from memory, using
 * munmap().
 *
 * @param fd is the file descriptor to unmap from memory.
 */
void
fabs_munmap (int fd)
{
#if HAVE_MMAP
  if (fd < 0)
    return;

  if (fabs_fds[fd].size > 0 && fabs_fds[fd].mmap_cache != NULL)
    {
      fabs_mmapped -= fabs_fds[fd].size;

      munmap (fabs_fds[fd].mmap_cache, fabs_fds[fd].size);
      fabs_fds[fd].mmap_cache = NULL;
      fabs_fds[fd].size = 0;
    }
#endif
}

/** @internal Helper for finding an element in the file cache.
 * This is called via bhl_lru_find() to compare if we are searching
 * for a given element.
 *
 * @param data is the current element in the cache.
 * @param size is it's size.
 * @param fdata is the element we're searching for.
 * @param fsize is it's size.
 *
 * @returns One if they match, zero otherwise.
 */
static int
_fabs_fcache_find (const void *data, size_t size,
		   const void *fdata, size_t fsize)
{
  const fabs_fcache_entry_t *cur = (const fabs_fcache_entry_t *)data;
  const char *find = (const char *)fdata;

  if (!strcmp (cur->orig, find))
    return 1;
  return 0;
}

/** @internal Helper for finding an element in the URL cache.
 * This is called via bhl_lru_find() to compare if we are searching
 * for a given element.
 *
 * @param data is the current element in the cache.
 * @param size is it's size.
 * @param fdata is the element we're searching for.
 * @param fsize is it's size.
 *
 * @returns One if they match, zero otherwise.
 */
static int
_fabs_ucache_find (const void *data, size_t size,
		   const void *fdata, size_t fsize)
{
  const fabs_ucache_entry_t *cur = (const fabs_ucache_entry_t *)data;
  const fabs_ucache_entry_t *find = (const fabs_ucache_entry_t *)fdata;
  int i = 0, j = 0;

  if (cur->url && find->url)
    i = !strcmp (cur->url, find->url);
  if (cur->host && find->host)
    j = !strcmp (cur->host, find->host);

  return i & j;
}

/** Determine a file's canonical name.
 * First searches the LRU cache if the original file is stored there,
 * then, if not found, calls canonicalize_file_name() and adds the
 * result to the cache.
 *
 * The main use of this function is to reduce the number of filesystem
 * accesses.
 *
 * @param file is the file to canonicalize.
 *
 * @returns A newly allocated string with the canonicalized filename,
 * or NULL on error.
 *
 * @note The returned string must be freed by the caller!
 * @note As a side-effect, this modifies #fabs_fcache.
 */
char *
fabs_realpath (const char *file)
{
  fabs_fcache_entry_t *res;
  char *canon;

  if (!file)
    return NULL;

  res = bhl_lru_find (fabs_fcache, _fabs_fcache_find, file, 0);
  if (!res)
    {
      canon = canonicalize_file_name (file);
      if (!canon)
	return NULL;

      res = (fabs_fcache_entry_t *)
	bhc_malloc (sizeof (fabs_fcache_entry_t));
      res->orig = bhc_strdup (file);
      res->canon = canon;
      res->st = NULL;
      res->age = time (NULL);

      bhl_lru_add (fabs_fcache, res, sizeof (fabs_fcache_entry_t));
    }
  else
    if (!res->canon)
      {
	res->canon = canonicalize_file_name (file);
	res->age = time (NULL);
      }
    else
      {
	/* Lets see if we need to expire the entry... */
	if (res->age + fabs_fcache_expire < time (NULL))
	  {
	    canon = bhc_strdup (res->canon);
	    bhl_lru_delete (fabs_fcache, res);
	    return canon;
	  }
      }

  return bhc_strdup (res->canon);
}

/** Determine the file corresponding to an URL.
 * First searches the LRU cache if the original file is stored there,
 * then, if not found, calls http_url_resolve() and adds the
 * result to the cache.
 *
 * The main use of this function is to reduce the number of filesystem
 * accesses.
 *
 * @param url is the URL to resolve.
 * @param host is the host the URL is requested from.
 * @param absuri is the absolute URI.
 *
 * @returns A newly allocated string with the canonicalized filename,
 * or NULL on error.
 *
 * @note The returned string must be freed by the caller!
 * @note As a side-effect, this modifies #fabs_fcache.
 */
char *
fabs_urlmap (const char *url, const char *host, const char *absuri)
{
  fabs_ucache_entry_t *res, tmp;
  char *canon;

  tmp.url = bhc_strdup (url);
  tmp.host = bhc_strdup (host);
  res = bhl_lru_find (fabs_ucache, _fabs_ucache_find, &tmp, 0);
  free (tmp.url);
  free (tmp.host);

  if (!res)
    {
      canon = http_url_resolve (url, host, absuri);
      if (!canon)
	return NULL;

      res = (fabs_ucache_entry_t *)
	bhc_malloc (sizeof (fabs_ucache_entry_t));
      res->url = bhc_strdup (url);
      res->host = bhc_strdup (host);
      res->file = canon;
      res->age = time (NULL);

      bhl_lru_add (fabs_ucache, res, sizeof (fabs_ucache_entry_t));
    }
  else
    {
      /* Lets see if we need to expire the entry... */
      if (res->age + fabs_fcache_expire < time (NULL))
	{
	  canon = bhc_strdup (res->file);
	  bhl_lru_delete (fabs_ucache, res);
	  return canon;
	}
    }

  return bhc_strdup (res->file);
}

/** stat() a file.
 * First searches the LRU cache if the original file is stored there,
 * then, if not found, calls stat() and adds the result to the cache.
 *
 * The main use of this function is to reduce the number of filesystem
 * accesses.
 *
 * @param file is the file to stat().
 * @param st is the result buffer.
 *
 * @returns Zero on success, -1 otherwise.
 *
 * @note As a side-effect, this modifies #fabs_fcache.
 */
int
fabs_stat (const char *file, struct stat *st)
{
  fabs_fcache_entry_t *res;
  int i = 0;
  struct stat *stb;

  if (!file)
    {
      memset (st, 0, sizeof (struct stat));
      return -1;
    }

  res = bhl_lru_find (fabs_fcache, _fabs_fcache_find, file, 0);
  if (!res)
    {
      stb = (struct stat *)bhc_malloc (sizeof (struct stat));
      i = stat (file, stb);
      if (i != 0)
	memset (stb, 0, sizeof (struct stat));

      res = (fabs_fcache_entry_t *)
	bhc_malloc (sizeof (fabs_fcache_entry_t));
      res->orig = bhc_strdup (file);
      res->canon = NULL;
      res->st = stb;
      res->age = time (NULL);

      bhl_lru_add (fabs_fcache, res, sizeof (fabs_fcache_entry_t));
    }
  else
    if (!res->st)
      {
	res->st = (struct stat *)bhc_malloc (sizeof (struct stat));
	res->age = time (NULL);
	i = stat (file, res->st);
      }
  else
    {
      /* Lets see if we need to expire the entry... */
      if (res->age + fabs_fcache_expire < time (NULL) ||
	  (res->st->st_nlink == 0))
	{
	  memcpy (st, res->st, sizeof (struct stat));
	  bhl_lru_delete (fabs_fcache, res);
	  return 0;
	}
    }

  *st = *res->st;
  return i;
}

/** Check if a file is accessible.
 * This is a simple wrapper around fabs_stat(), that checks if the
 * other group has read, execute or write permissions on a given file.
 *
 * @param file is the file to check.
 * @param mode is the access mode (see access(2)).
 *
 * @returns Zero on success, -1 otherwise.
 */
int
fabs_access (const char *file, int mode)
{
  struct stat st;
  int u, g, o;

  if (fabs_stat (file, &st) != 0)
    return -1;

  if (st.st_nlink == 0)
    return -1;

  if (mode == F_OK)
    return 0;

  switch (mode)
    {
    case R_OK:
      o = -!(st.st_mode & S_IROTH);
      g = -!(st.st_mode & S_IRGRP);
      u = -!(st.st_mode & S_IRUSR);
      break;
    case X_OK:
      o = -!(st.st_mode & S_IXOTH);
      g = -!(st.st_mode & S_IXGRP);
      u = -!(st.st_mode & S_IXUSR);
      break;
    default:
      /* We don't handle W_OK, as Thy will never ever call us with
	 that flag. */
      return -1;
    }

  if (fabs_uid == st.st_uid)
    {
      if (u != 0)
	{
	  if (fabs_gid == st.st_gid)
	    {
	      if (g != 0)
		return o;
	      else
		return g;
	    }
	  else
	    return o;
	}
      else
	return u;
    }
  else
    {
      if (fabs_gid == st.st_gid)
	{
	  if (g != 0)
	    return o;
	  else
	    return g;
	}
      else
	return o;
    }
}
