/* $Cambridge: hermes/src/prayer/session/rfc1522.c,v 1.3 2008/09/16 09:59:58 dpc22 Exp $ */
/************************************************
 *    Prayer - a Webmail Interface              *
 ************************************************/

/* Copyright (c) University of Cambridge 2000 - 2008 */
/* See the file NOTICE for conditions of use and distribution. */

#include "prayer_session.h"

/* RFC1522 routines stolen directly from Pine:
 *   mostly from pine/strings.c
 *   match_escapes comes from pine/mailview.c
 */

/* Some support routines and macros */

#define PROTO(x) x
#define VAR_CHAR_SET "ISO-8859-1"
#define SPACE ' '
#define ESCAPE '\033'
#define UNKNOWN_CHARSET "Unknown"

/*
 * Useful def's to help with HEX string conversions
 */
/*
 * Hex conversion aids
 */
#define HEX_ARRAY	"0123456789ABCDEF"
#define	HEX_CHAR1(C)	HEX_ARRAY[((C) & 0xf0) >> 4]
#define	HEX_CHAR2(C)	HEX_ARRAY[(C) & 0xf]

#define	XDIGIT2C(C)	((C) - (isdigit((unsigned char) (C)) \
			  ? '0' : (isupper((unsigned char)(C))? '7' : 'W')))
#define	X2C(S)		((XDIGIT2C(*(S)) << 4) | XDIGIT2C(*((S)+1)))
#define	C2XPAIR(C, S)	{ \
			    *(S)++ = HEX_CHAR1(C); \
			    *(S)++ = HEX_CHAR2(C); \
			}

/*------------------------------------------------------------------
   This list of known escape sequences is taken from RFC's 1486 and 1554
   and draft-apng-cc-encoding, and the X11R5 source with only a remote
   understanding of what this all means...

   NOTE: if the length of these should extend beyond 4 chars, fix
	 MAX_ESC_LEN in filter.c
  ----*/
static char *known_escapes[] = {
    "(B", "(J", "$@", "$B",     /* RFC 1468 */
    "$A", "$(C", "$(D", ".A", ".F",     /* added by RFC 1554 */
    "$)C", "$)A", "$*E", "$*X", /* those in apng-draft */
    "$+G", "$+H", "$+I", "$+J", "$+K",
    "$+L", "$+M",
    ")I", "-A", "-B", "-C", "-D",       /* codes form X11R5 source */
    "-F", "-G", "-H", "-L", "-M",
    "-$(A", "$(B", "$)B", "$)D",
    NULL
};

static int match_escapes(esc_seq)
char *esc_seq;
{
    char **p;
    int n = 0;

    for (p = known_escapes; *p && strncmp(esc_seq, *p, n = strlen(*p));
         p++);

    return (*p ? n + 1 : 0);
}

static void sstrcpy(d, s)
char **d;
char *s;
{
    while ((**d = *s++) != '\0')
        (*d)++;
}

/* ====================================================================== */

/*----------------------------------------------------------------------
  copy the source string onto the destination string returning with
  the destination string pointer at the end of the destination text

  motivation for this is to avoid twice passing over a string that's
  being appended to twice (i.e., strcpy(t, x); t += strlen(t))
 ----*/


static void sstrncpy(d, s, n)
char **d;
char *s;
int n;
{
    while (n-- > 0 && (**d = *s++) != '\0')
        (*d)++;
}

static char *strindex(buffer, ch)
char *buffer;
int ch;
{
    do
        if (*buffer == ch)
            return (buffer);
    while (*buffer++ != '\0');

    return (NULL);
}

/*
 *  * * * * * * * *      RFC 1522 support routines      * * * * * * * *
 *
 *   RFC 1522 support is *very* loosely based on code contributed
 *   by Lars-Erik Johansson <lej@cdg.chalmers.se>.  Thanks to Lars-Erik,
 *   and appologies for taking such liberties with his code.
 */


#define	RFC1522_INIT	"=?"
#define	RFC1522_INIT_L	2
#define RFC1522_TERM	"?="
#define	RFC1522_TERM_L	2
#define	RFC1522_DLIM	"?"
#define	RFC1522_DLIM_L	1
#define	RFC1522_MAXW	75
#define	ESPECIALS	"()<>@,;:\"/[]?.="
#define	RFC1522_OVERHEAD(S)	(RFC1522_INIT_L + RFC1522_TERM_L +	\
				 (2 * RFC1522_DLIM_L) + strlen(S) + 1);
#define	RFC1522_ENC_CHAR(C)	(((C) & 0x80) || !rfc1522_valtok(C)	\
				 || (C) == '_' )


int rfc1522_token PROTO((char *, int (*)PROTO((int)), char *, char **));
int rfc1522_valtok PROTO((int));
int rfc1522_valenc PROTO((int));
int rfc1522_valid PROTO((char *, char **, char **, char **, char **));
char *rfc1522_8bit PROTO((void *, int));
char *rfc1522_binary PROTO((void *, int));
unsigned char *rfc1522_encoded_word PROTO((unsigned char *, int, char *));


/*
 * rfc1522_decode - decode the given source string ala RFC 2047 (nee 1522),
 *		    IF NECESSARY, into the given destination buffer.
 *		    Don't bother copying if it turns out decoding
 *		    isn't necessary.
 *
 * Returns: pointer to either the destination buffer containing the
 *	    decoded text, or a pointer to the source buffer if there was
 *	    no reason to decode it.
 */
unsigned char *rfc1522_decode(d, len, s, charset)
unsigned char *d;
size_t len;                     /* length of d */
char *s;
char **charset;
{
    unsigned char *rv = NULL, *p;
    char *start = s, *sw, *cset, *enc, *txt, *ew, **q, *lang;
    unsigned long l;
    unsigned char *src;
    int i;

    *d = '\0';                  /* init destination */
    if (charset)
        *charset = NULL;

    while (s && (sw = strstr(s, RFC1522_INIT))) {
        /* validate the rest of the encoded-word */
        if (rfc1522_valid(sw, &cset, &enc, &txt, &ew)) {
           if (!rv)
                rv = d;         /* remember start of dest */

            /* copy everything between s and sw to destination */
            for (i = 0; &s[i] < sw; i++)
                if (!isspace((unsigned char) s[i])) {   /* if some non-whitespace */
                    /* Assume that any 8 bit characters are Latin-1 */
                    utf8_print(VAR_CHAR_SET, NULL,
                               &d, len - (d - rv) - 1,
                               (unsigned char**)&s, sw - s);
                    break;
                }

            enc[-1] = txt[-1] = ew[0] = '\0';   /* tie off token strings */

            if ((lang = strchr(cset, '*')))
                *lang++ = '\0';

            if (!*cset) {
                cset = UNKNOWN_CHARSET;
            }
            if (charset) {
                if (!*charset)      /* only write first charset */
                    *charset = cpystr(cset);
            }

            /* based on encoding, write the encoded text to output buffer */
            switch (*enc) {
            case 'Q':          /* 'Q' encoding */
            case 'q':
                /* special hocus-pocus to deal with '_' exception, too bad */
                for (l = 0L, i = 0; txt[l]; l++)
                    if (txt[l] == '_')
                        i++;

                if (i) {
                    q = (char **) fs_get((i + 1) * sizeof(char *));
                    for (l = 0L, i = 0; txt[l]; l++)
                        if (txt[l] == '_') {
                            q[i++] = &txt[l];
                            txt[l] = SPACE;
                        }

                    q[i] = NULL;
                } else
                    q = NULL;

                if ((src = p =
                     rfc822_qprint((unsigned char *) txt, strlen(txt),
                                   &l))) {
                    utf8_print(cset, VAR_CHAR_SET,
                               &d, len - 1 - (d - rv),
                               &src, l);
                    *d = '\0';
                    fs_give((void **) &p);      /* free encoded buf */
                } else {
                    if (q)
                        fs_give((void **) &q);

                    goto bogus;
                }

                if (q) {        /* restore underscores */
                    for (i = 0; q[i]; i++)
                        *(q[i]) = '_';

                    fs_give((void **) &q);
                }

                break;

            case 'B':          /* 'B' encoding */
            case 'b':
                if ((src = p =
                     rfc822_base64((unsigned char *) txt, strlen(txt),
                                   &l))) {
                    utf8_print(cset, VAR_CHAR_SET,
                               &d, len - 1 - (d - rv),
                               &src, l);
                    *d = '\0';
                    fs_give((void **) &p);      /* free encoded buf */
                } else
                    goto bogus;

                break;

            bogus:
            default:
                src = (unsigned char *)txt;
                utf8_print(VAR_CHAR_SET, NULL,
                           &d, len - 1 - (d - rv),
                           (unsigned char **)&src, ew - txt);
               *d = '\0';
                break;
            }

            /* restore trompled source string */
            enc[-1] = txt[-1] = '?';
            ew[0] = RFC1522_TERM[0];

            /* advance s to start of text after encoded-word */
            s = ew + RFC1522_TERM_L;

            if (lang)
                lang[-1] = '*';

        } else {

            /*
             * Found intro, but bogus data followed, treat it as normal text.
             */

            /* if already copying to destn, copy it */
            if (rv) {
                utf8_print(VAR_CHAR_SET, NULL,
                           &d, len - 1 - (d - rv),
                           (unsigned char**)&s, sw - s);
                *d = '\0';
                s += l;         /* advance s beyond intro */
            } else
                s += ((sw - s) + RFC1522_INIT_L);
        }
    }

    if (rv && *s) {               /* copy remaining text */
        utf8_print(VAR_CHAR_SET, NULL,
                   &d, len - 1 - (d - rv),
                   (unsigned char**)&s, strlen(s));
        *d = '\0';
    }

/* BUG: MUST do code page mapping under DOS after decoding */

    if (rv) return rv;

    for (s = start; *s; s++) {
        if (*s & 0x80) {
            rv = d;
            utf8_print(VAR_CHAR_SET, NULL,
                       &d, len - 1,
                       (unsigned char**)&start, strlen(s));
            *d = '\0';
            return rv;

        }
    }
    return (unsigned char *) start;
}


/*
 * rfc1522_token - scan the given source line up to the end_str making
 *		   sure all subsequent chars are "valid" leaving endp
 *		   a the start of the end_str.
 * Returns: T if we got a valid token, NIL otherwise
 */
int rfc1522_token(s, valid, end_str, endp)
char *s;
int (*valid) PROTO((int));
char *end_str;
char **endp;
{
    while (*s) {
        if ((char) *s == *end_str       /* test for matching end_str */
            && ((end_str[1])
                ? !strncmp((char *) s + 1, end_str + 1,
                           strlen(end_str + 1))
                : 1)) {
            *endp = s;
            return (T);
        }

        if (!(*valid) (*s++))   /* test for valid char */
            break;
    }

    return (NIL);
}


/*
 * rfc1522_valtok - test for valid character in the RFC 1522 encoded
 *		    word's charset and encoding fields.
 */
int rfc1522_valtok(c)
int c;
{
    return (!(c == SPACE || iscntrl(c & 0x7f) || strindex(ESPECIALS, c)));
}


/*
 * rfc1522_valenc - test for valid character in the RFC 1522 encoded
 *		    word's encoded-text field.
 */
int rfc1522_valenc(c)
int c;
{
    return (!(c == '?' /*|| c == SPACE*/) && isprint((unsigned char) c));
}


/*
 * rfc1522_valid - validate the given string as to it's rfc1522-ness
 */
int rfc1522_valid(s, charset, enc, txt, endp)
char *s;
char **charset;
char **enc;
char **txt;
char **endp;
{
    char *c, *e, *t, *p;
    int rv;

    rv = rfc1522_token(c =
                       s + RFC1522_INIT_L, rfc1522_valtok, RFC1522_DLIM,
                       &e)
        && rfc1522_token(++e, rfc1522_valtok, RFC1522_DLIM, &t)
        && rfc1522_token(++t, rfc1522_valenc, RFC1522_TERM, &p)
        /* && p - s <= RFC1522_MAXW */;

    if (charset)
        *charset = c;

    if (enc)
        *enc = e;

    if (txt)
        *txt = t;

    if (endp)
        *endp = p;

    return (rv);
}


/*
 * rfc1522_encode - encode the given source string ala RFC 1522,
 *		    IF NECESSARY, into the given destination buffer.
 *		    Don't bother copying if it turns out encoding
 *		    isn't necessary.
 *
 * Returns: pointer to either the destination buffer containing the
 *	    encoded text, or a pointer to the source buffer if we didn't
 *          have to encode anything.
 */
char *rfc1522_encode(d, len, s, charset)
char *d;
size_t len;                     /* length of d */
unsigned char *s;
char *charset;
{
    unsigned char *p, *q;
    int n;

    if (!s)
        return ((char *) s);

    if (!charset)
        charset = UNKNOWN_CHARSET;

    /* look for a reason to encode */
    for (p = s, n = 0; *p; p++)
        if ((*p) & 0x80) {
            n++;
        } else if (*p == RFC1522_INIT[0]
                   && !strncmp((char *) p, RFC1522_INIT, RFC1522_INIT_L)) {
            if (rfc1522_valid((char *) p, NULL, NULL, NULL, (char **) &q))
                p = q + RFC1522_TERM_L - 1;     /* advance past encoded gunk */
        } else if ((*p == ESCAPE && match_escapes((char *) (p + 1)))) {
            n++;
        }

    if (n) {                    /* found, encoding to do */
        char *rv = d, *t, enc = (n > (2 * (p - s)) / 3) ? 'B' : 'Q';

        while (*s) {
            if (d - rv <
                len - 1 - (RFC1522_INIT_L + 2 * RFC1522_DLIM_L + 1)) {
                sstrcpy(&d, RFC1522_INIT);      /* insert intro header, */
                sstrcpy(&d, charset);   /* character set tag, */
                sstrcpy(&d, RFC1522_DLIM);      /* and encoding flavor */
                *d++ = enc;
                sstrcpy(&d, RFC1522_DLIM);
            }

            /*
             * feed lines to encoder such that they're guaranteed
             * less than RFC1522_MAXW.
             */
            p = rfc1522_encoded_word(s, enc, charset);
            if (enc == 'B')     /* insert encoded data */
                sstrncpy(&d, t =
                         rfc1522_binary(s, p - s), len - 1 - (d - rv));
            else                /* 'Q' encoding */
                sstrncpy(&d, t =
                         rfc1522_8bit(s, p - s), len - 1 - (d - rv));

            sstrncpy(&d, RFC1522_TERM, len - 1 - (d - rv));     /* insert terminator */
            fs_give((void **) &t);
            if (*p)             /* more src string follows */
                sstrncpy(&d, "\015\012 ", len - 1 - (d - rv));  /* insert cont. line */

            s = p;              /* advance s */
        }

        rv[len - 1] = '\0';
        return (rv);
    } else
        return ((char *) s);    /* no work for us here */
}



/*
 * rfc1522_encoded_word -- cut given string into max length encoded word
 *
 * Return: pointer into 's' such that the encoded 's' is no greater
 *	   than RFC1522_MAXW
 *
 *  NOTE: this line break code is NOT cognizant of any SI/SO
 *  charset requirements nor similar strategies using escape
 *  codes.  Hopefully this will matter little and such
 *  representation strategies don't also include 8bit chars.
 */
unsigned char *rfc1522_encoded_word(s, enc, charset)
unsigned char *s;
int enc;
char *charset;
{
    int goal = RFC1522_MAXW - RFC1522_OVERHEAD(charset);

    if (enc == 'B')             /* base64 encode */
        for (goal = ((goal / 4) * 3) - 2; goal && *s; goal--, s++);
    else                        /* special 'Q' encoding */
        for (; goal && *s; s++)
            if ((goal -= RFC1522_ENC_CHAR(*s) ? 3 : 1) < 0)
                break;

    return (s);
}



/*
 * rfc1522_8bit -- apply RFC 1522 'Q' encoding to the given 8bit buffer
 *
 * Return: alloc'd buffer containing encoded string
 */
char *rfc1522_8bit(src, slen)
void *src;
int slen;
{
    char *ret = (char *) fs_get((size_t) (3 * slen + 2));
    char *d = ret;
    unsigned char c;
    unsigned char *s = (unsigned char *) src;

    while (slen--) {            /* for each character */
        if (((c = *s++) == '\015') && (*s == '\012') && slen) {
            *d++ = '\015';      /* true line break */
            *d++ = *s++;
            slen--;
        } else if (c == SPACE) {        /* special encoding case */
            *d++ = '_';
        } else if (RFC1522_ENC_CHAR(c)) {
            *d++ = '=';         /* quote character */
            C2XPAIR(c, d);
        } else
            *d++ = (char) c;    /* ordinary character */
    }

    *d = '\0';                  /* tie off destination */
    return (ret);
}


/*
 * rfc1522_binary -- apply RFC 1522 'B' encoding to the given 8bit buffer
 *
 * Return: alloc'd buffer containing encoded string
 */
char *rfc1522_binary(src, srcl)
void *src;
int srcl;
{
    static char *v =
        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    unsigned char *s = (unsigned char *) src;
    char *ret, *d;

    d = ret = (char *) fs_get((size_t) ((((srcl + 2) / 3) * 4) + 1));
    for (; srcl; s += 3) {      /* process tuplets */
        /* byte 1: high 6 bits (1) */
        *d++ = v[s[0] >> 2];
        /* byte 2: low 2 bits (1), high 4 bits (2) */
        *d++ = v[((s[0] << 4) + (--srcl ? (s[1] >> 4) : 0)) & 0x3f];
        /* byte 3: low 4 bits (2), high 2 bits (3) */
        *d++ =
            srcl ? v[((s[1] << 2) + (--srcl ? (s[2] >> 6) : 0)) & 0x3f] :
            '=';
        /* byte 4: low 6 bits (3) */
        *d++ = srcl ? v[s[2] & 0x3f] : '=';
        if (srcl)
            srcl--;             /* count third character if processed */
    }

    *d = '\0';                  /* tie off string */
    return (ret);               /* return the resulting string */
}
