/* This file is part of the KDE project
 * Copyright (C) 2007 Montel Laurent <montel@kde.org>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation version 2.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 *
 */

#define STRIGI_IMPORT_API
#include <strigi/streamthroughanalyzer.h>
#include <strigi/analyzerplugin.h>
#include <strigi/fieldtypes.h>
#include <strigi/analysisresult.h>

//kde include
#include <KUrl>
#include <kcodecs.h>
//qt include
#include <QFile>
#include <QTextStream>


using namespace std;
using namespace Strigi;

class mHtmlThroughAnalyzerFactory;
class mHtmlThroughAnalyzer : public StreamThroughAnalyzer {
    private:
        const mHtmlThroughAnalyzerFactory* factory;
        AnalysisResult* idx;

        void setIndexable( AnalysisResult *i ) {
            idx = i;
        }
        const char* name() const {
            return "mHtmlThroughAnalyzer";
        }

        InputStream* connectInputStream( InputStream *in );
        bool isReadyWithStream() { return true; }
    QString decodeRFC2047Phrase(const QString &msg, bool removeLessGreater=true);
    QString decodeRFC2047String(const QString &msg);
public:
        mHtmlThroughAnalyzer( const mHtmlThroughAnalyzerFactory* f ) : factory( f ) {}
};

class mHtmlThroughAnalyzerFactory : public StreamThroughAnalyzerFactory {
private:
    const char* name() const {
        return "mHtmlThroughAnalyzer";
    }
    StreamThroughAnalyzer* newInstance() const {
        return new mHtmlThroughAnalyzer(this);
    }
    void registerFields( FieldRegister& );

    static const std::string subjectFieldName;
    static const std::string senderFieldName;
    static const std::string recipientFieldName;
    static const std::string copyToFieldName;
    static const std::string blindCopyToFieldName;
    static const std::string dateFieldName;

public:
    const RegisteredField* subjectField;
    const RegisteredField* senderField;
    const RegisteredField* recipientField;
    const RegisteredField* copyToField;
    const RegisteredField* blindCopyToField;
    const RegisteredField* dateField;
};

const std::string mHtmlThroughAnalyzerFactory::subjectFieldName( "email.subject" );
const std::string mHtmlThroughAnalyzerFactory::senderFieldName( "email.from" );
const std::string mHtmlThroughAnalyzerFactory::recipientFieldName( "email.to" );
const std::string mHtmlThroughAnalyzerFactory::copyToFieldName( "email.cc" );
const std::string mHtmlThroughAnalyzerFactory::blindCopyToFieldName( "email.bcc" );
const std::string mHtmlThroughAnalyzerFactory::dateFieldName( "date" );

void mHtmlThroughAnalyzerFactory::registerFields( FieldRegister& reg ) {
	subjectField = reg.registerField( subjectFieldName, FieldRegister::stringType, 1, 0 );
	senderField = reg.registerField( senderFieldName, FieldRegister::stringType, 1, 0 );
	recipientField = reg.registerField( recipientFieldName, FieldRegister::stringType, 1, 0 );
	copyToField = reg.registerField( copyToFieldName, FieldRegister::stringType, 1, 0 );
	blindCopyToField = reg.registerField( blindCopyToFieldName, FieldRegister::stringType, 1, 0 );
	dateField = reg.registerField( dateFieldName, FieldRegister::stringType, 1, 0 );
}

InputStream* mHtmlThroughAnalyzer::connectInputStream( InputStream* in ) {
    const string& path = idx->path();
    QString mSender;
    QString mRecipient;
    QString mCopyTo;
    QString mBlindCopyTo;
    QString mSubject;
    QString mDate;
    bool canUnfold=false;
    QFile f(path.c_str());
    if (!f.open(IO_ReadOnly))
        return in;
    QTextStream stream(&f);
    QString l=stream.readLine();
    int nFieldsFound = 0;
    while(!l.isEmpty()){
        if(l.startsWith("From: ")) {
            mSender=l.mid(6);
            nFieldsFound |= 1;
            canUnfold=true;
        } else if(l.startsWith("To: ")) {
            mRecipient=l.mid(4);
            nFieldsFound |= 2;
            canUnfold=true;
        } else if(l.startsWith("Subject: ")) {
            mSubject=l.mid(9);
            nFieldsFound |= 4;
            canUnfold=true;
        } else if(l.startsWith("Cc: ")) {
            mCopyTo=l.mid(4);
            nFieldsFound |= 8;
            canUnfold=true;
        } else if(l.startsWith("Bcc: ")) {
            mBlindCopyTo=l.mid(5);
            nFieldsFound |= 16;
            canUnfold=true;
        } else if(l.startsWith("Date: ")) {
            mDate=l.mid(6);
            nFieldsFound |= 32;
            canUnfold=false;
        }else if(l.startsWith(" ") || l.startsWith("\t")){
            // unfold field
            if(canUnfold){
                QString tmp=l.trimmed();
                if(nFieldsFound & 16) mBlindCopyTo=mBlindCopyTo+' '+tmp;
                else if(nFieldsFound & 8) mCopyTo=mCopyTo+' '+tmp;
                else if(nFieldsFound & 4) mSubject=mSubject+' '+tmp;
                else if(nFieldsFound & 2) mRecipient=mRecipient+' '+tmp;
                else if(nFieldsFound & 1) mSender=mSender+' '+tmp;
            }
        }else canUnfold=false;
        // break out of the loop once the six fields have been found
        if ( nFieldsFound == 32+16+8+4+2+1 )
            break;
        l=stream.readLine();
    }
    f.close();

    idx->addValue( factory->subjectField, (const char*)decodeRFC2047Phrase(mSubject,false).toUtf8() );
    idx->addValue( factory->senderField, (const char*)decodeRFC2047Phrase(mSender).toUtf8() );
    idx->addValue( factory->recipientField, (const char*)decodeRFC2047Phrase(mRecipient).toUtf8() );
    idx->addValue( factory->copyToField, (const char*)decodeRFC2047Phrase(mCopyTo).toUtf8());
    idx->addValue( factory->blindCopyToField, (const char*)decodeRFC2047Phrase(mBlindCopyTo).toUtf8());
    idx->addValue( factory->dateField, (const char*)mDate.toUtf8() );
    return in;
}

QString mHtmlThroughAnalyzer::decodeRFC2047Phrase(const QString &msg, bool removeLessGreater){
    int st=msg.indexOf("=?");
    int en=-1;
    QString msgCopy=msg;
    QString decodedText=msgCopy.left(st);
    QString encodedText=msgCopy.mid(st);
    st=encodedText.indexOf("=?");
    while(st!=-1){
        en=encodedText.indexOf("?=");
        while(encodedText.mid(en+2,2)!="\" " && encodedText.mid(en+2,1)!=" " && en+2<(int)encodedText.length()) en=encodedText.indexOf("?=",en+1);
        if(en==-1) break;
        decodedText+=encodedText.left(st);
        QString tmp=encodedText.mid(st,en-st+2);
        encodedText=encodedText.mid(en+2);
        decodedText+=decodeRFC2047String(tmp);
        st=encodedText.indexOf("=?");
    }
    decodedText += encodedText;
    // remove unwanted '<' and '>'
    if(removeLessGreater){
        if(decodedText.trimmed().startsWith("<") && decodedText.trimmed().endsWith(">")){
            QString tmp=decodedText.trimmed();
            tmp=tmp.mid(1,tmp.length()-2);
            decodedText=tmp;
        }else{
            QString dec=decodedText;
            QString tmp;

            st=decodedText.indexOf("<");
            while(st!=-1){
                st=dec.indexOf("<",st);
                if(st==0 || (st!=0 && (dec.mid(st-2,2)==", "))){
                    en=dec.indexOf(">",st);
                    if(en==-1 && dec.indexOf(",",st)<en){
                        st++;
                        continue;
                    }
                    dec=dec.left(st)+dec.mid(st+1,en-st-1)+dec.mid(en+1);
                }else if(st!=-1) st++;
            }
            decodedText=dec;
        }
    }
    return decodedText;
}

QString mHtmlThroughAnalyzer::decodeRFC2047String(const QString &msg){
    QString charset;
    QString encoding;
    QString notEncodedText;
    QString encodedText;
    QString decodedText;
    int encEnd=0;
    if(msg.startsWith("=?") && (encEnd=msg.lastIndexOf("?="))!=-1){
        notEncodedText=msg.mid(encEnd+2);
        encodedText=msg.left(encEnd);
        encodedText=encodedText.mid(2,encodedText.length()-2);
        int questionMark=encodedText.indexOf('?');
        if(questionMark==-1) return msg;
        charset=encodedText.left(questionMark).toLower();
        encoding=encodedText.mid(questionMark+1,1).toLower();
        if(encoding!="b" && encoding!="q") return msg;
        encodedText=encodedText.mid(questionMark+3);
        if(charset.indexOf(" ")!=-1 && encodedText.indexOf(" ")!=-1) return msg;
        QByteArray tmpIn;
        QByteArray tmpOut;
        tmpIn = encodedText.toLocal8Bit();
        if(encoding=="q")tmpOut=KCodecs::quotedPrintableDecode(tmpIn);
        else tmpOut=KCodecs::base64Decode(tmpIn);
        if(charset!="us-ascii"){
            QTextCodec *codec = QTextCodec::codecForName(charset.toLocal8Bit());
            if(!codec) return msg;
            decodedText=codec->toUnicode(tmpOut);
            decodedText=decodedText.replace("_"," ");
        }else decodedText=tmpOut.replace("_"," ");
        return decodedText + notEncodedText;
    }else return msg;
}

class Factory : public AnalyzerFactoryFactory {
public:
    std::list<StreamThroughAnalyzerFactory*>
    streamThroughAnalyzerFactories() const {
        std::list<StreamThroughAnalyzerFactory*> af;
        af.push_back(new mHtmlThroughAnalyzerFactory());
        return af;
    }
};

STRIGI_ANALYZER_FACTORY(Factory)

