/*
 * Smart Common Input Method
 * 
 * Copyright (c) 2002 James Su <suzhe@turbolinux.com.cn>
 *
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 * Boston, MA  02111-1307  USA
 *
 * $Id: scim_utility.cpp,v 1.21 2004/01/20 09:59:32 suzhe Exp $
 */

#define Uses_SCIM_UTILITY
#define Uses_C_LOCALE
#define Uses_C_ICONV
#define Uses_C_STDLIB

#include <langinfo.h>
#include <pwd.h>
#include <dirent.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdio.h>

#include "scim_private.h"
#include "scim.h"

namespace scim {

int
utf8_mbtowc (ucs4_t *pwc, const unsigned char *src, int src_len)
{
    if (!pwc)
        return 0;

    unsigned char c = src [0];

    if (c < 0x80) {
        *pwc = c;
        return 1;
    } else if (c < 0xc2) {
        return RET_ILSEQ;
    } else if (c < 0xe0) {
        if (src_len < 2)
            return RET_TOOFEW(0);
        if (!((src [1] ^ 0x80) < 0x40))
            return RET_ILSEQ;
        *pwc = ((ucs4_t) (c & 0x1f) << 6)
                 | (ucs4_t) (src [1] ^ 0x80);
        return 2;
    } else if (c < 0xf0) {
        if (src_len < 3)
            return RET_TOOFEW(0);
        if (!((src [1] ^ 0x80) < 0x40 && (src [2] ^ 0x80) < 0x40
                && (c >= 0xe1 || src [1] >= 0xa0)))
            return RET_ILSEQ;
        *pwc = ((ucs4_t) (c & 0x0f) << 12)
                 | ((ucs4_t) (src [1] ^ 0x80) << 6)
                 | (ucs4_t) (src [2] ^ 0x80);
        return 3;
    } else if (c < 0xf8) {
        if (src_len < 4)
            return RET_TOOFEW(0);
        if (!((src [1] ^ 0x80) < 0x40 && (src [2] ^ 0x80) < 0x40
                && (src [3] ^ 0x80) < 0x40
                && (c >= 0xf1 || src [1] >= 0x90)))
            return RET_ILSEQ;
        *pwc = ((ucs4_t) (c & 0x07) << 18)
                 | ((ucs4_t) (src [1] ^ 0x80) << 12)
                 | ((ucs4_t) (src [2] ^ 0x80) << 6)
                 | (ucs4_t) (src [3] ^ 0x80);
        return 4;
    } else if (c < 0xfc) {
        if (src_len < 5)
            return RET_TOOFEW(0);
        if (!((src [1] ^ 0x80) < 0x40 && (src [2] ^ 0x80) < 0x40
                && (src [3] ^ 0x80) < 0x40 && (src [4] ^ 0x80) < 0x40
                && (c >= 0xf9 || src [1] >= 0x88)))
            return RET_ILSEQ;
        *pwc = ((ucs4_t) (c & 0x03) << 24)
                 | ((ucs4_t) (src [1] ^ 0x80) << 18)
                 | ((ucs4_t) (src [2] ^ 0x80) << 12)
                 | ((ucs4_t) (src [3] ^ 0x80) << 6)
                 | (ucs4_t) (src [4] ^ 0x80);
        return 5;
    } else if (c < 0xfe) {
        if (src_len < 6)
            return RET_TOOFEW(0);
        if (!((src [1] ^ 0x80) < 0x40 && (src [2] ^ 0x80) < 0x40
                && (src [3] ^ 0x80) < 0x40 && (src [4] ^ 0x80) < 0x40
                && (src [5] ^ 0x80) < 0x40
                && (c >= 0xfd || src [1] >= 0x84)))
            return RET_ILSEQ;
        *pwc = ((ucs4_t) (c & 0x01) << 30)
                 | ((ucs4_t) (src [1] ^ 0x80) << 24)
                 | ((ucs4_t) (src [2] ^ 0x80) << 18)
                 | ((ucs4_t) (src [3] ^ 0x80) << 12)
                 | ((ucs4_t) (src [4] ^ 0x80) << 6)
                 | (ucs4_t) (src [5] ^ 0x80);
        return 6;
    } else
        return RET_ILSEQ;
}

int
utf8_wctomb (unsigned char *dest, ucs4_t wc, int dest_size)
{
    if (!dest)
        return 0;

    int count;
    if (wc < 0x80)
        count = 1;
    else if (wc < 0x800)
        count = 2;
    else if (wc < 0x10000)
        count = 3;
    else if (wc < 0x200000)
        count = 4;
    else if (wc < 0x4000000)
        count = 5;
    else if (wc <= 0x7fffffff)
        count = 6;
    else
        return RET_ILSEQ;
    if (dest_size < count)
        return RET_TOOSMALL;
    switch (count) { /* note: code falls through cases! */
        case 6: dest [5] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x4000000;
        case 5: dest [4] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x200000;
        case 4: dest [3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
        case 3: dest [2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
        case 2: dest [1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
        case 1: dest [0] = wc;
    }
    return count;
}

ucs4_t
utf8_read_wchar (std::istream &is)
{
    unsigned char utf8[6];
    ucs4_t wc;
    int count;
    for (int i=0; i<6; ++i) {
        is.read ((char*)(utf8+i), sizeof(unsigned char));
        if ((count=utf8_mbtowc (&wc, utf8, i+1)) > 0) 
            return wc;
        if (count == RET_ILSEQ)
            return 0;
    }
    return 0;
}

WideString
utf8_read_wstring (std::istream &is, ucs4_t delim, bool rm_delim)
{
    WideString str;
    ucs4_t wc;
    while ((wc = utf8_read_wchar (is)) > 0) {
        if (wc != delim)
            str.push_back (wc);
        else {
            if (!rm_delim)
                str.push_back (wc);
            break;
        }
    }
    return str;
}

std::ostream &
utf8_write_wchar (std::ostream &os, ucs4_t wc)
{
    unsigned char utf8[6];
    int count = 0;

    if ((count=utf8_wctomb (utf8, wc, 6)) > 0)
        os.write ((char*)utf8, count * sizeof (unsigned char));

    return os;
}

std::ostream &
utf8_write_wstring (std::ostream &os, const WideString & wstr)
{
    for (unsigned int i=0; i<wstr.size (); ++i)
        utf8_write_wchar (os, wstr [i]);

    return os;
}

WideString
utf8_mbstowcs (const String & str)
{
    WideString wstr;
    ucs4_t wc;
    unsigned int sn = 0;
    int un = 0;

    const unsigned char *s = (const unsigned char *) str.c_str ();

    while (sn < str.length () && *s != 0 &&
            (un=utf8_mbtowc (&wc, s, str.length () - sn)) > 0) {
        wstr.push_back (wc);
        s += un;
        sn += un;
    }
    return wstr;
}

WideString
utf8_mbstowcs (const char *str, int len)
{
    WideString wstr;

    if (str) {
        ucs4_t wc;
        unsigned int sn = 0;
        int un = 0;

        if (len < 0) len = strlen (str);

        while (sn < len && *str != 0 && (un=utf8_mbtowc (&wc, (const unsigned char *)str, len - sn)) > 0) {
            wstr.push_back (wc);
            str += un;
            sn += un;
    
        }
    }
    return wstr;
}

String
utf8_wcstombs (const WideString & wstr)
{
    String str;
    char utf8 [6];
    int un = 0;

    for (unsigned int i = 0; i<wstr.size (); ++i) {
        un = utf8_wctomb ((unsigned char*)utf8, wstr [i], 6);
        if (un > 0)
            str.append (utf8, un);
    }
    return str;
}

String
utf8_wcstombs (const ucs4_t *wstr, int len)
{
    String str;
    char utf8 [6];
    int un = 0;

    if (wstr) {
        if (len < 0)
            for (len = 0; wstr [len]; ++len) NULL;

        for (int i = 0; i < len; ++i) {
            un = utf8_wctomb ((unsigned char*)utf8, wstr [i], 6);
            if (un > 0)
                str.append (utf8, un);
        }
    }
    return str;
}

String
scim_validate_locale (const String& locale)
{
    String good;

    String last = String (setlocale (LC_CTYPE, 0));

    if (setlocale (LC_CTYPE, locale.c_str ())) {
        good = locale;
    } else {
        std::vector<String> vec;
        if (scim_split_string_list (vec, locale, '.') == 2) {
            if (isupper (vec[1][0])) {
                for (String::iterator i=vec[1].begin (); i!=vec[1].end (); ++i) 
                    *i = (char) tolower (*i);
            } else {
                for (String::iterator i=vec[1].begin (); i!=vec[1].end (); ++i) 
                    *i = (char) toupper (*i);
            }
            if (setlocale (LC_CTYPE, (vec[0] + "." + vec[1]).c_str ())) {
                good = vec [0] + "." + vec[1];
            }
        }
    }

    setlocale (LC_CTYPE, last.c_str ());

    return good;
}

String
scim_get_locale_encoding (const String& locale)
{
    String last = String (setlocale (LC_CTYPE, 0));
    String encoding;

    if (setlocale (LC_CTYPE, locale.c_str ()))
        encoding = String (nl_langinfo (CODESET));
    else {
        std::vector<String> vec;
        if (scim_split_string_list (vec, locale, '.') == 2) {
            if (isupper (vec[1][0])) {
                for (String::iterator i=vec[1].begin (); i!=vec[1].end (); ++i) 
                    *i = (char) tolower (*i);
            } else {
                for (String::iterator i=vec[1].begin (); i!=vec[1].end (); ++i) 
                    *i = (char) toupper (*i);
            }
            if (setlocale (LC_CTYPE, (vec[0] + "." + vec[1]).c_str ()))
                encoding = String (nl_langinfo (CODESET));
        }
        
    }

    setlocale (LC_CTYPE, last.c_str ());

    return encoding;
}

int
scim_get_locale_maxlen (const String& locale)
{
    int maxlen;

    String last = String (setlocale (LC_CTYPE, 0));

    if (setlocale (LC_CTYPE, locale.c_str ()))
        maxlen = MB_CUR_MAX;
    else
        maxlen = 1;

    setlocale (LC_CTYPE, last.c_str ());
    return maxlen;
}

int
scim_split_string_list (std::vector<String>& vec, const String& str, char delim)
{
    int count = 0;

    String temp;
    String::const_iterator bg, ed;

    vec.clear ();

    bg = str.begin ();
    ed = str.begin ();

    while (bg != str.end () && ed != str.end ()) {
        for (; ed != str.end (); ++ed) {
            if (*ed == delim)
                break;
        }
        temp.assign (bg, ed);
        vec.push_back (temp);
        ++count;

        if (ed != str.end ())
            bg = ++ ed;
    }
    return count;
}

String
scim_combine_string_list (const std::vector<String>& vec, char delim)
{
    String result;
    for (std::vector<String>::const_iterator i = vec.begin (); i!=vec.end (); ++i) {
        result += *i;
        if (i+1 != vec.end ())
            result += delim;
    }
    return result;
}

bool
scim_if_wchar_ucs4_equal ()
{
    if (sizeof (wchar_t) != sizeof (ucs4_t))
        return false;

    iconv_t cd;
    wchar_t wcbuf [2] = {0,0};
    ucs4_t  ucsbuf [2] = {0x4E00, 0x0001};
    size_t  wclen = 2 * sizeof (wchar_t);
    size_t  ucslen = 2 * sizeof (ucs4_t);

    char *wcp = (char *) wcbuf;
    ICONV_CONST char *ucsp = (ICONV_CONST char *) ucsbuf;

    if (scim_is_little_endian ())
        cd = iconv_open ("UCS-4LE", "wchar_t");
    else
        cd = iconv_open ("UCS-4BE", "wchar_t");
    
    if (cd == (iconv_t) -1)
        return false;

    iconv (cd, &ucsp, &ucslen, &wcp, &wclen);

    iconv_close (cd);

    if (wcbuf [0] == (wchar_t) ucsbuf [0] &&
        wcbuf [1] == (wchar_t) ucsbuf [1])
        return true;

    return false;
}

struct {
    ucs4_t half;
    ucs4_t full;
    ucs4_t size;
} __half_full_table [] = {
    {0x0020, 0x3000, 1},
    {0x0021, 0xFF01, 0x5E},
    {0x00A2, 0xFFE0, 2},
    {0x00A5, 0xFFE5, 1},
    {0x00A6, 0xFFE4, 1},
    {0x00AC, 0xFFE2, 1},
    {0x00AF, 0xFFE3, 1},
    {0x20A9, 0xFFE6, 1},
    {0xFF61, 0x3002, 1},
    {0xFF62, 0x300C, 2},
    {0xFF64, 0x3001, 1},
    {0xFF65, 0x30FB, 1},
    {0xFF66, 0x30F2, 1},
    {0xFF67, 0x30A1, 1},
    {0xFF68, 0x30A3, 1},
    {0xFF69, 0x30A5, 1},
    {0xFF6A, 0x30A7, 1},
    {0xFF6B, 0x30A9, 1},
    {0xFF6C, 0x30E3, 1},
    {0xFF6D, 0x30E5, 1},
    {0xFF6E, 0x30E7, 1},
    {0xFF6F, 0x30C3, 1},
    {0xFF70, 0x30FC, 1},
    {0xFF71, 0x30A2, 1},
    {0xFF72, 0x30A4, 1},
    {0xFF73, 0x30A6, 1},
    {0xFF74, 0x30A8, 1},
    {0xFF75, 0x30AA, 2},
    {0xFF77, 0x30AD, 1},
    {0xFF78, 0x30AF, 1},
    {0xFF79, 0x30B1, 1},
    {0xFF7A, 0x30B3, 1},
    {0xFF7B, 0x30B5, 1},
    {0xFF7C, 0x30B7, 1},
    {0xFF7D, 0x30B9, 1},
    {0xFF7E, 0x30BB, 1},
    {0xFF7F, 0x30BD, 1},
    {0xFF80, 0x30BF, 1},
    {0xFF81, 0x30C1, 1},
    {0xFF82, 0x30C4, 1},
    {0xFF83, 0x30C6, 1},
    {0xFF84, 0x30C8, 1},
    {0xFF85, 0x30CA, 6},
    {0xFF8B, 0x30D2, 1},
    {0xFF8C, 0x30D5, 1},
    {0xFF8D, 0x30D8, 1},
    {0xFF8E, 0x30DB, 1},
    {0xFF8F, 0x30DE, 5},
    {0xFF94, 0x30E4, 1},
    {0xFF95, 0x30E6, 1},
    {0xFF96, 0x30E8, 6},
    {0xFF9C, 0x30EF, 1},
    {0xFF9D, 0x30F3, 1},
    {0xFFA0, 0x3164, 1},
    {0xFFA1, 0x3131, 30},
    {0xFFC2, 0x314F, 6},
    {0xFFCA, 0x3155, 6},
    {0xFFD2, 0x315B, 9},
    {0xFFE9, 0x2190, 4},
    {0xFFED, 0x25A0, 1},
    {0xFFEE, 0x25CB, 1},
    {0,0,0}
};


/**
 * convert a half width unicode char to full width char
 */
ucs4_t
scim_wchar_to_full_width (ucs4_t code)
{
    int i=0;
    while (__half_full_table [i].size) {
        if (code >= __half_full_table [i].half &&
            code <  __half_full_table [i].half +
                    __half_full_table [i].size)
            return __half_full_table [i].full +
                    (code - __half_full_table [i].half);
        ++ i;
    }
    return code;
}

/**
 * convert a full width unicode char to half width char
 */
ucs4_t
scim_wchar_to_half_width (ucs4_t code)
{
    int i=0;
    while (__half_full_table [i].size) {
        if (code >= __half_full_table [i].full &&
            code <  __half_full_table [i].full +
                    __half_full_table [i].size)
            return __half_full_table [i].half +
                    (code - __half_full_table [i].full);
        ++ i;
    }
    return code;
}

String
scim_get_home_dir ()
{
    const char * home_dir = 0;

    struct passwd *pw;

    setpwent ();
    pw = getpwuid (getuid ());
    endpwent ();

    if (pw) {
        home_dir = pw->pw_dir;
    }

    if (!home_dir) {
        home_dir = getenv ("HOME");
    }

    return String (home_dir);
}

String
scim_get_user_name ()
{
    struct passwd *pw;
    const char *user_name;

    setpwent ();
    pw = getpwuid (getuid ());
    endpwent ();

    if (pw && pw->pw_name)
        return String (pw->pw_name);
    else if ((user_name = getenv ("USER")) != NULL)
        return String (user_name);

    char uid_str [10];

    snprintf (uid_str, 10, "%u", getuid ());

    return String (uid_str);
}

String
scim_get_current_locale ()
{
    return String (setlocale (LC_CTYPE, 0));
}

bool
scim_is_little_endian ()
{
    short endian = 1;
    return (*((char *)&endian) != 0);
}

size_t
scim_load_file (const String &filename, char **bufptr)
{
    if (!filename.length ())
        return 0;

    struct stat statbuf;

    if (stat (filename.c_str (), &statbuf) < 0 ||
        !S_ISREG (statbuf.st_mode) ||
        !statbuf.st_size)
        return 0;

    if (!bufptr)
        return statbuf.st_size;

    FILE *fp = fopen (filename.c_str (), "r");

    if (fp == NULL) {
        *bufptr = 0;
        return 0;
    }

    try {
        *bufptr = new char [statbuf.st_size];
    } catch (...) {
        fclose (fp);
        throw;
    }

    if (! (*bufptr)) {
        fclose (fp);
        return 0;
    }

    size_t size = fread (*bufptr, 1, statbuf.st_size, fp);

    fclose (fp);

    if (!size) {
        delete [] *bufptr;
        *bufptr = 0;
    }

    return size;
}

bool
scim_make_dir (const String &dir)
{
    std::vector <String> paths;
    String path;

    scim_split_string_list (paths, dir, SCIM_PATH_DELIM);
 
    for (size_t i = 0; i < paths.size (); ++i) {
        path += SCIM_PATH_DELIM_STRING + paths [i];

        //Make the dir if it's not exist.
        if (access (path.c_str (), R_OK) != 0) {
            mkdir (path.c_str (), S_IRUSR | S_IWUSR | S_IXUSR);
            if (access (path.c_str (), R_OK) != 0)
                return false;
        }
    }
    return true;
}

}
/*
vi:ts=4:ai:nowrap:expandtab
*/
