/* vim: set noet ts=4:
 *
 * Copyright (c) 2002-2003 Martin A. Godisch <martin@godisch.de>.
 *
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License as published by the Free Software
 * Foundation; either version 2 of the License, or (at your option) any later
 * version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place, Suite 330, Boston, MA 02111-1307 USA.
 */
#include <data.h>
#include <freedict.h>
#include <latrine.h>
#include <lsvt2.h>
#include <memory.h>
#include <stdio.h>
#include <time.h>

char
	*dictfile = NULL,
	*wordfile = NULL;
size_t
	randcount = DEFAULT_RANDCOUNT,
	wordlimit = DEFAULT_WORDLIMIT;
static size_t
	dictcount = 0,
	wordcount = 0;
static struct word
	*wordlist = NULL;
static double
	small_out = 0.0,
	large_in  = 0.0;

size_t get_wordcount(void) { return wordcount; }
size_t get_dictcount(void) { return dictcount; }

/* compare wordlist elements by hit rate
 */
static int comp_by_hit(const void *a, const void *b)
{
	double q1 = RATE((struct word*)a);
	double q2 = RATE((struct word*)b);
	if (q1 < q2)
		return -1;
	else if (q1 > q2)
		return +1;
	return 0;
}

/* compare wordlist elements by dictionary position
 */
static int comp_by_pos(const void *a, const void *b)
{
	double p1 = ((struct word*)a)->pos;
	double p2 = ((struct word*)b)->pos;
	if (p1 < p2)
		return -1;
	else if (p1 > p2)
		return +1;
	return 0;
}

/* select a random struct word
 *
 * returns struct word
 * returns NULL: failure (errmsg called)
 */
struct word *select_word(void)
{
	if (randcount > wordcount)
		randcount = wordcount;
	if (randcount == 0) {
		errmsg(_("internal error: empty wordlist"));
		return NULL;
	}
	qsort(wordlist, wordcount, sizeof(struct word), comp_by_hit);
	return &wordlist[random() % randcount];
}

/* update hit counter of a struct word
 */
void update_word(struct word *w, int hit)
{
	double q = 0.0;

	assert(w != NULL);
	q = RATE(w);
	if (hit)
		w->correct++;
	else
		w->mistaken++;
	if (q == large_in)
		large_in = RATE(w);
}

/* free the whole wordlist, reset wordcount
 */
static inline void free_wordlist(void)
{
	size_t i;

	for (i = 0; i < wordcount; i++) {
		FREE(&wordlist[i].lang[0]);
		FREE(&wordlist[i].lang[1]);
	}
	if (wordlist != NULL)
		free(wordlist);
	wordlist  = NULL;
	wordcount = 0;
}

/* load the dictionary and the corresponding hits file,
 * an existing wordlist will be overridden
 *
 * returns  0: success
 * returns -1: failure (errmsg called, wordlist freed)
 */
int load_wordlist(void)
{
	int (*open_dict)(const char*) = NULL;
	int (*read_dict)(gzFile, struct word*) = NULL;
	int (*close_dict)(void) = NULL;
	gzFile hits = NULL;
	double q    = 0.0;
	struct word w;
	size_t cursize, i;
	time_t randinit;
	int    ret;

	assert(dictfile != NULL);
	assert(wordfile != NULL);
	if (strcasecmp(dictfile + strlen(dictfile) - 4, ".vt2") == 0) {
#ifdef HAVE_LIBMDB
		open_dict  = open_lsvt2;
		read_dict  = read_lsvt2;
		close_dict = close_lsvt2;
#else
		errmsg(_("This binary lacks MDB Tools support, please recompile."));
		return -1;
#endif
	} else {
		open_dict  = open_freedict;
		read_dict  = read_freedict;
		close_dict = close_freedict;
	}
	if (wordlimit == 0) {
		if (wordlist == NULL)
			wordlist = (struct word*)MALLOC((cursize = WORDSTEP) * sizeof(struct word));
		else
			cursize  = wordcount;
	} else {
		if (wordlist == NULL)
			wordlist = (struct word*)MALLOC(wordlimit * sizeof(struct word));
		else {
			wordlist = (struct word*)REALLOC(wordlist, wordlimit * sizeof(struct word));
			memset(&wordlist[wordcount], 0, &wordlist[wordlimit] - &wordlist[wordcount]);
		}
		cursize = wordlimit;
	}
	if (open_dict(dictfile) == -1)
		return -1;
	if ((hits = gzopen(wordfile, "rb")) == NULL && errno != ENOENT)
		errmsg(_("cannot open wordlist: %s"), errno == 0 ? zError(Z_MEM_ERROR) : strerror(errno));
	for (dictcount = 0, wordcount = 0; (ret = read_dict(hits, &w)) == 1; dictcount++) {
		if (wordcount >= cursize && wordlimit == 0) {
			assert(wordcount == cursize);
			wordlist = (struct word*)REALLOC(wordlist, (cursize += WORDSTEP) * sizeof(struct word));
			memset(&wordlist[wordcount], 0, &wordlist[cursize] - &wordlist[wordcount]);
		}
		q = RATE(&w);
		if (wordcount < cursize) {
			if (wordcount == 0 || q > large_in)
				large_in = q;
			FREE(&wordlist[wordcount].lang[0]);
			FREE(&wordlist[wordcount].lang[1]);
			wordlist[wordcount++] = w;
			continue;
		}
		if (q < large_in) {
			for (i = 0; i < cursize; i++)
				if (RATE(&wordlist[i]) == large_in)
					break;
			assert(i < cursize);
			FREE(&wordlist[i].lang[0]);
			FREE(&wordlist[i].lang[1]);
			wordlist[i] = w;
			small_out = large_in;
			large_in  = q;
			continue;
		}
		if (wordcount == cursize || q < small_out)
			small_out = q;
		FREE(&w.lang[0]);
		FREE(&w.lang[1]);
	}
	FREE(&w.lang[0]);
	FREE(&w.lang[1]);
	close_dict();
	gzclose(hits);
	if (ret == -1 || wordcount == 0)
		free_wordlist();
	else {
		if (wordcount < cursize)
			wordlist = (struct word*)REALLOC(wordlist, wordcount * sizeof(struct word));
		time(&randinit);
		srandom(randinit);
		qsort(wordlist, wordcount, sizeof(struct word), comp_by_hit);
	}
	if (ret == 0 && wordcount == 0) {
		errmsg(_("invalid or empty dictionary"));
		ret = -1;
	}
	return ret;
}

/* save the dictionary and the corresponding hits file
 *
 * returns  0: success
 * returns -1: failure (errmsg called)
 */
int save_wordlist(void)
{
	char buffer[BUFSIZE];
	char *tempfile    = NULL;
	gzFile old        = NULL;
	gzFile new        = NULL;
	const char *zmsg  = NULL;
	unsigned correct  = 0;
	unsigned mistaken = 0;
	size_t i, n;
	int ret;

	if (wordcount == 0)
		return 0;
	tempfile = (char*)MALLOC(strlen(wordfile) + 5);
	sprintf(tempfile, "%s.new", wordfile);
	if ((old = gzopen(wordfile, "rb")) == NULL && errno != ENOENT)
		errmsg(_("cannot open old wordfile: %s"), errno == 0 ? zError(Z_MEM_ERROR) : strerror(errno));
	if ((new = gzopen(tempfile, "wb")) == NULL) {
		errmsg(_("cannot open new wordfile: %s"), errno == 0 ? zError(Z_MEM_ERROR) : strerror(errno));
		FREE(&tempfile);
		return -1;
	}
	if (old == NULL) {
		gzprintf(new, _("# LaTrine wordlist file, do not edit while LaTrine is running!\n"));
		gzprintf(new, _("# Dictionary: %s\n\n"), dictfile);
	}
	qsort(wordlist, wordcount, sizeof(struct word), comp_by_pos);
	for (i = 0, n = 0; gzgets(old, buffer, BUFSIZE) != Z_NULL;) {
		if (*buffer == '\n' || *buffer == '#') {
			gzprintf(new, "%s", buffer);
			continue;
		}
		if (sscanf(buffer, "%d:%d", &correct, &mistaken) != 2) {
			gzprintf(new, "# %s", buffer);
			continue;
		}
		if (i < wordcount && wordlist[i].pos == n) {
			gzprintf(new, "%d:%d\n", wordlist[i].correct, wordlist[i].mistaken);
			i++;
		} else
			gzprintf(new, "%d:%d\n", correct, mistaken);
		n++;
	}
	gzclose(old);
	for (; n < dictcount; n++)
		if (i < wordcount && wordlist[i].pos == n) {
			gzprintf(new, "%d:%d\n", wordlist[i].correct, wordlist[i].mistaken);
			i++;
		} else
			gzprintf(new, "%d:%d\n", 0, 0);
	if ((ret = gzclose(new)) != Z_OK) {
		zmsg = gzerror(new, &ret);
		errmsg(_("cannot close wordfile: %s"), ret == Z_ERRNO ? strerror(errno) : zmsg);
		FREE(&tempfile);
		return -1;
	}
	if (rename(tempfile, wordfile) != 0) {
		errmsg(_("cannot update wordfile: %s"), strerror(errno));
		FREE(&tempfile);
		return -1;
	}
	FREE(&tempfile);
	return 0;
}
