/*
 * Copyright (C) 2010 Canonical, Ltd.
 *
 * This library is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License
 * version 3.0 as published by the Free Software Foundation.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License version 3.0 for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library. If not, see
 * <http://www.gnu.org/licenses/>.
 *
 * Authored by:
 *               Mikkel Kamstrup Erlandsen <mikkel.kamstrup@canonical.com>
 */

/**
 * SECTION:dee-analyzers
 * @title: Analyzers
 * @short_description: A suite of simple #DeeAnalyzers of common operations
 * @include: dee.h
 *
 */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "dee-analyzers.h"
#include "trace-log.h"

/*
 * FORWARDS
 */
static void _dee_analyze_key_func       (DeeModel     *model,
                                         DeeModelIter *iter,
                                         DeeTermList  *out_terms,
                                         gpointer      userdata);

static void _dee_analyze_full_text_func    (DeeModel     *model,
                                            DeeModelIter *iter,
                                            DeeTermList  *out_terms,
                                            gpointer      userdata);

/*
 * ANALYZER FUNCS
 */
static void
_dee_analyze_key_func       (DeeModel     *model,
                             DeeModelIter *iter,
                             DeeTermList  *out_terms,
                             gpointer      userdata)
{
  const gchar *val;

  val = dee_model_get_string (model, iter, GPOINTER_TO_UINT (userdata));
  dee_term_list_add_term (out_terms, val);
}

static void
_dee_analyze_full_text_func    (DeeModel     *model,
                                DeeModelIter *iter,
                                DeeTermList  *out_terms,
                                gpointer      userdata)
{
  GPtrArray   *term_array;
  const gchar *val, *p, *last_term, *end;
  gchar       *term, *_term;
  gunichar     chr;
  gint         term_len_bytes, i;

  val = dee_model_get_string (model, iter, GPOINTER_TO_UINT (userdata));

  if (!g_utf8_validate (val, -1, &end))
    {
      g_warning ("Unable to analyze invalid UTF-8: %s", val);
      return;
    }

  term_array = g_ptr_array_new ();
  g_ptr_array_set_free_func (term_array, (GDestroyNotify) g_free);

  /* Split on non-alphanumeric characters
   * Watch out: "Clever" pointer arithmetic ahead... :-) */
  p = val;
  last_term = val;
  while (p != end)
    {
      chr = g_utf8_get_char (p);
      if (!g_unichar_isalnum(chr) || p == end)
        {
          term_len_bytes = p - last_term;
          term = g_strndup (last_term, term_len_bytes);
          g_ptr_array_add (term_array, term);

          while (!g_unichar_isalnum(chr) && p != end)
            {
              p = g_utf8_next_char (p);
              chr = g_utf8_get_char (p);
            }

          last_term = p;
          continue;
        }

      p = g_utf8_next_char (p);
    };

  if (last_term != p)
    {
      term_len_bytes = p - last_term;
      term = g_strndup (last_term, term_len_bytes);
      g_ptr_array_add (term_array, term);
    }

  /* Normalize terms , lowercase them, and add them to the term list */
  for (i = 0; i < term_array->len; i++)
    {
      term = g_ptr_array_index (term_array, i);
      term = g_utf8_normalize (term, -1, G_NORMALIZE_ALL_COMPOSE);
      _term = g_utf8_strdown (term, -1);

      dee_term_list_add_term (out_terms, _term);

      g_free (term);
      g_free (_term);
    }

  g_ptr_array_unref (term_array);
}

static void
_dee_analyze_int_func       (DeeModel     *model,
                             DeeModelIter *iter,
                             DeeTermList  *out_terms,
                             gpointer      userdata)
{
  int val;
  gchar *term;

  val = dee_model_get_int (model, iter, GPOINTER_TO_UINT (userdata));
  term = g_strdup_printf ("%i", val);
  dee_term_list_add_term (out_terms, term);
  g_free (term);
}

static void
_dee_analyze_uint_func       (DeeModel     *model,
                             DeeModelIter *iter,
                             DeeTermList  *out_terms,
                             gpointer      userdata)
{
  guint val;
  gchar *term;

  val = dee_model_get_uint (model, iter, GPOINTER_TO_UINT (userdata));
  term = g_strdup_printf ("%u", val);
  dee_term_list_add_term (out_terms, term);
  g_free (term);
}

/*
 * API
 */

/**
 * dee_analyzer_new_for_key_column:
 * @column: The index of the column to get strings from
 *
 * Create a #DeeAnalyzer that takes the string from a column in the model
 * and treats that string as one single term.
 *
 * Returns: A newly allocated #DeeAnalyzer. Do not modify it.
 *          Free with g_free().
 */
DeeAnalyzer*
dee_analyzer_new_for_key_column (guint column)
{
  DeeAnalyzer *analyzer;

  analyzer = g_new0 (DeeAnalyzer, 1);
  analyzer->analyze = _dee_analyze_key_func;
  analyzer->userdata = GUINT_TO_POINTER (column);

  return analyzer;
}

/**
 * dee_analyzer_new_for_full_text_column:
 * @column: The index of the column to get full text from
 *
 * Create a #DeeAnalyzer that does a (simple) full text analysis of textual
 * data in some column.
 *
 * The terms will be split on any non-alphanumeric character, run through
 * g_utf8_normalize(), and then g_utf8_strdown().
 *
 * Returns: A newly allocated #DeeAnalyzer. Do not modify it.
 *          Free with g_free().
 */
DeeAnalyzer*
dee_analyzer_new_for_full_text_column (guint column)
{
  DeeAnalyzer *analyzer;

  analyzer = g_new0 (DeeAnalyzer, 1);
  analyzer->analyze = _dee_analyze_full_text_func;
  analyzer->userdata = GUINT_TO_POINTER (column);

  return analyzer;
}

DeeAnalyzer*
dee_analyzer_new_for_int_column (guint column)
{
	DeeAnalyzer *analyzer;

	analyzer = g_new0 (DeeAnalyzer, 1);
	analyzer->analyze = _dee_analyze_int_func;
	analyzer->userdata = GUINT_TO_POINTER (column);

	return analyzer;
}

DeeAnalyzer*
dee_analyzer_new_for_uint_column (guint column)
{
  DeeAnalyzer *analyzer;

  analyzer = g_new0 (DeeAnalyzer, 1);
  analyzer->analyze = _dee_analyze_uint_func;
  analyzer->userdata = GUINT_TO_POINTER (column);

  return analyzer;
}
