/*
 * Copyright (C) 2003 Joo-won Jung <sanori@sanori.net>
 * 
 *     This program is free software; you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation; either version 2 of the License, or
 *     (at your option) any later version.
 * 
 *     This program is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 * 
 *     You should have received a copy of the GNU General Public License
 *     along with this program; if not, write to the Free Software
 *     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
 */

/*
 * This code is rewitten from mime.h, sendlib.c, rfc2047.[ch] of mutt
 * 1.4.1 (which is GPL also) by Michael R. Elkins <me@cs.hmc.edu> and
 * Edmund Grimley Evans <edmundo@rano.org>.
 * Some parts of this code are adopted from base64.c of mutt which is
 * "heavily modified" from fetchmail by Brendan Cully <brendan@kublai.com>.
 *
 * Original copyright notice of fetchmail:
 *  
 * The code in the fetchmail distribution is Copyright 1997 by Eric
 * S. Raymond.  Portions are also copyrighted by Carl Harris, 1993
 * and 1995.  Copyright retained for the purpose of protecting free
 * redistribution of source. 
 */

#include <stdlib.h>
#include <string.h>
#include "mime_coder.h"

/* Standard encoding name */
const char *EncodingName[] = { NULL, "quoted-printable", "base64"};

/* en/de-code tables */
const char B64Chars[64] = {
	'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
	'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
	'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
	't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7',
	'8', '9', '+', '/'
};  

const int BAD = -1;
const int Index_hex[128] = {
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	 0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
};

const int Index_64[128] = {
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
	52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
	-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
	15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
	-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
	41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
};  

#define hexval(c) Index_hex[(unsigned int)(c)]
#define base64val(c) Index_64[(unsigned int)(c)]

/* From RFC-822 and RFC-2047 */
const size_t MAX_LINE_LEN = 76;


/*
 * Encode the string 'in' with the length 'ilen' as base64
 * and store the result to 'out' whose allocated space is 'olen'.
 * Returns: the number of bytes encoded.
 */
size_t encode_b64 (char *out, size_t olen,
		const char *in, size_t ilen)
{
  char *out0 = out;

  while (ilen >= 3 && olen > 4)
  {
	  *out++ = B64Chars[(in[0] >> 2) & 0x3f];
      *out++ = B64Chars[((in[0] & 0x03) << 4) | ((in[1] >> 4) & 0x0f)];
      *out++ = B64Chars[((in[1] & 0x0f) << 2) | ((in[2] >> 6) & 0x03)];
	  *out++ = B64Chars[in[2] & 0x3f];
	  in    += 3, ilen   -= 3;
	  olen  -= 4;
  }

  if (ilen > 0 && olen > 4)
  {
	  unsigned char fragment;

	  *out++ = B64Chars[(in[0] >> 2) & 0x3f];
	  fragment = (in[0] << 4) & 0x30;
	  if (ilen > 1)
		  fragment |= ((in[1] >> 4) & 0x0f);
	  *out++ = B64Chars[fragment];
	  *out++ = (ilen < 2) ? '=' : B64Chars[(in[1] & 0x0f)<< 2];
	  *out++ = '=';
  }
  *out = '\0';

  return out - out0;
}

/*
 * Decode the null-terminated base64 string 'in' and store 
 * to the character array 'out' whose * length is 'olen'.
 * Returns: the number of bytes decoded (<= olen) or -1 (bad encoding).
 */
size_t _decode_b64(char *out, size_t olen, const char *in, size_t ilen)
{
	register unsigned char digit1, digit2, digit3, digit4;

	size_t olen0 = olen;

	while(olen && ilen && *in) {
		if (ilen >= 4) {
			digit1 = in[0];
			if (digit1 > 127 || base64val(digit1) == BAD)
				return -1;
			digit2 = in[1];
			if (digit2 > 127 || base64val(digit2) == BAD)
				return -1;
			digit3 = in[2];
			if (digit3 > 127 || ((digit3 != '=') && (base64val(digit3) == BAD)))
				return -1;
			digit4 = in[3];
			if (digit4 > 127 || ((digit4 != '=') && (base64val(digit4) == BAD)))
				return -1;
			in += 4, ilen -= 4;
		} else {
			return -1;
		}
		
		/* digits are already sanity-checked */
		*out++ = (base64val(digit1) << 2) | (base64val(digit2) >> 4);
		if (--olen == 0) break;
		if (digit3 != '=')
		{
			*out++ = ((base64val(digit2) << 4) & 0xf0) | (base64val(digit3) >> 2);
			if (--olen == 0) break;
			if (digit4 != '=')
			{
				*out++ = ((base64val(digit3) << 6) & 0xc0) | base64val(digit4);
				if (--olen == 0) break;
			} else break;
		} else break;
	}

	return olen0 - olen;
}

/*
 * Encode the string 'in' with the length 'ilen' as quoted-printable
 * and store the result to 'out' whose allocated space is 'olen'.
 * Returns: the number of bytes encoded.
 */
const char MimeSpecials[] = "@.,;:<>[]\\\"()?/= \t";
size_t _encode_qp (char *out, size_t olen, const char *in, size_t ilen,
		int rfc2047)
{
	char hex[] = "0123456789ABCDEF";
	char *out0 = out;
	while (ilen-- && olen-- >= 3)
	{
		unsigned char c = *in++;
		if (rfc2047 && c == ' ')
			*out++ = '_';
		else if (c >= 0x7f || c <= 0x20 || c == '_' || strchr(MimeSpecials, c))
		{
			*out++ = '=';
			*out++ = hex[(c & 0xf0) >> 4];
			*out++ = hex[c & 0x0f];
			olen-=2;
		}
		else
			*out++ = c;
	}
	*out = '\0';
	
	return out - out0;
}

size_t _decode_qp(char *out, size_t olen, const char *in, size_t ilen,
		int rfc2047)
{
	size_t olen0 = olen;

	while (olen && ilen && *in)
	{
	    if (rfc2047 && *in == '_')
	    {
	      *out++ = ' ';
	    }
		else if (*in == '=')
		{
			int i;
			unsigned char c;

			for(i = 1; i <=2; i++)
			{
				c = (unsigned char) in[i];
				if (c > 127 || (hexval (c) == BAD))
					return -1;
			}
			*out++ = (hexval(in[1]) << 4) | hexval(in[2]);
			in += 2, ilen -= 2;
	    }
	    else
	    {
	      *out++ = *in;
		}
		in++, ilen--;
		olen--;
	}
	*out = '\0';
	return olen0 - olen;
}

/*
 * Find the RFC 2047 encoded word and store the information of
 * encoded word. RFC2047(+RFC2231) encoded word is the form of:
 *
 *		=?'charset'(*'language')?{B|Q}?'encoded_content'?=
 *
 * We ignore RFC2231 language extension.
 *
 * Return value: the beginnig of the encoded word
 *        or NULL unless the encoded word exists.
 * The variables 'start' and 'end' points the start and end of
 * the encoded_content.
 */
static const char *
find_encoded_word(char *charset, EncodingType *enc, const char **start,
		const char **end, const char *in) {
	const char *p, *q;
	
	q = in;
	while ((p = strstr (q, "=?"))) {
		int i;
		for (q=p+2, i=0; 0x20 < *q && *q < 0x7f && (i < CHARSET_LEN) 
				&& !strchr ("*()<>@,;:\"/[]?.=", *q); q++,i++)
				charset[i] = *q;
		charset[i]='\0';

		if (*q == '*') {	/* ignore RFC2231 */
			for (; 0x20 < *q && *q < 0x7f
				&& !strchr ("()<>@,;:\"/[]?.=", *q); q++);
		}
		if (q[0] != '?' || q[2] != '?')
			continue;	/* something's wrong */

		if (q[1] == 'B' || q[1] == 'b') {
			*enc = ENC_B64;
		} else if (q[1] == 'Q' || q[1] == 'q') {
			*enc = ENC_QP;
		} else continue;

		*start = q + 3; /* start of encoded content */
		
		for (q = q + 3; 0x20 < *q && *q < 0x7f && *q != '?'; q++) ;
		if (q[0] != '?' || q[1] != '=') {
			--q;
			continue;
		}
		*end = q;
		return p;
	}
	return 0;
}

/*
 * Decode RFC-2047 or RFC-2231 encoded string 'in' and store it to the
 * argument 'out' of which the size is 'olen'. The character set is
 * returned to the argument charset. The language identifier of
 * RFC-2231 is ignored.
 */
size_t decode_rfc2047(char *out, size_t olen, char *charset, const char *in)
{
	const char *out0 = out;
	int found_encoded = 0;
	const char *p, *start, *end;
	EncodingType enc = ENC_NONE;
	size_t n = -1;
	size_t len;
	char *buf;


    while (*in && olen > 0) {
		if (!(p=find_encoded_word(charset, &enc, &start, &end, in))) {
			/* no more encoded word */
			const char *q;
			q = strncpy(out, in, olen);
			out[olen] = '\0';
			out += strlen(q);
			olen -= strlen(q);
			break;
		}
		if (p != in) {
			n = (size_t) (p - in);
			/* remove spaces between the encoded words */
			if (!found_encoded || strspn(in, " \t\r\n") != n)
			{
				/* otherwise, copy the string as it is */
				if (n > olen)
					n = olen;
				memcpy (out, in, n);
				out += n;
				olen -= n;
			}
		}

		/* decode */
		len = end - start;
		buf = malloc(len);
		if (enc == ENC_B64) {
			n = _decode_b64(buf, len, start, len);
		} else if (enc == ENC_QP) {
			n = _decode_qp(buf, len, start, len, 1);
		}

		if (n != -1) {
			if (n > olen)
				n = olen;
			memcpy (out, buf, n);
			out += n;
			olen -= n;
			found_encoded = 1;
		} else {
			/* decoding failed. copy the raw content */
			n = end - p + 2;
			if (n > olen)
				n = olen;
			memcpy (out, p, n);
			out += n;
			olen -= n;
		}
		in = end + 2;
	}
	return out - out0;
}

/*
 * Encode the 'charset' charater set string 'in' whth the length 'ilen'
 * as RFC-2047 encoding 'enc' and store it to the argument 'out' of
 * which the size is 'olen'.
 */
size_t encode_rfc2047(char *out, size_t olen,
		const char *in, size_t ilen,
		const char *charset, const EncodingType enc, size_t header_len)
{
	char *preamble, *p;
	size_t clen = strlen(charset);
	char *out0 = out;
	size_t linelen, plen;
	
	/* make preamble */
	preamble = (char *)malloc(clen + 6);
	p = preamble;

	memcpy (p, "=?", 2), p += 2;
	memcpy (p, charset, clen), p += clen;
	if (enc == ENC_B64) {
		memcpy(p, "?B?", 3), p += 3;
	} else if (enc == ENC_QP) {
		memcpy(p, "?Q?", 3), p += 3;
	} else {
		free(preamble);
		return -1;
	}

	*p = '\0';
	plen = strlen(preamble);

#define min(x, y) (((x)<(y))?(x):(y))
	linelen = MAX_LINE_LEN - 2 - header_len;
	while(olen > plen+3 && ilen > 0) {
		size_t li, lo;
		memcpy(out, preamble, plen), out += plen;
		linelen -= plen;
		if (enc == ENC_B64) {
			li = min((linelen/4)*3, ilen);
			lo = encode_b64(out, olen-3, in, li);
			ilen -= li;
			in += li;
			out += lo;
			olen -= lo;
		} else { /* ENC_QP */
			while (linelen > 3 && ilen > 0) {
				li = min(linelen/3, ilen);
				lo = _encode_qp(out, olen-3, in, li, 1);
				ilen -= li;
				in += li;
				out += lo;
				olen -= lo;
				linelen -= lo;
			}
		}
		memcpy(out, "?=\n ", 4), out += 4, olen -= 4;

		linelen = MAX_LINE_LEN - 1;
	}
	out -= 2;
	*out = '\0';

	free(preamble);
	return out - out0;
}

/* Return the maximum possible number of bytes to be needed to encode
 * 'ilen' length string with RFC-2047 encoding 'enc' and with charset 
 * in which length is charsetlen.
 */
size_t enclen_rfc2047(size_t ilen,
		size_t charset_len, EncodingType enc, size_t header_len) {
	/* minimum chunck: =?charset?B??= */
	size_t chunk_len = charset_len + 7;
	size_t enclen, lines;
	if (enc == ENC_B64) {
		enclen = enclen_b64(ilen);
	} else if (enc == ENC_QP) {
		enclen = enclen_qp(ilen);
	} else {
		return -1; /* somethings are wrong */
	}
	lines = enclen/(MAX_LINE_LEN - chunk_len);
	if (enclen - lines * (MAX_LINE_LEN - chunk_len) + header_len
			> MAX_LINE_LEN) {
		lines++;
	}
	return(enclen + chunk_len + lines * (chunk_len + 2));
}

