# -*- coding: utf-8 -*-

# ==============================================================================
# COPYRIGHT (C) 1991 - 2003  EDF R&D                  WWW.CODE-ASTER.ORG
# THIS PROGRAM IS FREE SOFTWARE; YOU CAN REDISTRIBUTE IT AND/OR MODIFY
# IT UNDER THE TERMS OF THE GNU GENERAL PUBLIC LICENSE AS PUBLISHED BY
# THE FREE SOFTWARE FOUNDATION; EITHER VERSION 2 OF THE LICENSE, OR
# (AT YOUR OPTION) ANY LATER VERSION.
#
# THIS PROGRAM IS DISTRIBUTED IN THE HOPE THAT IT WILL BE USEFUL, BUT
# WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED WARRANTY OF
# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. SEE THE GNU
# GENERAL PUBLIC LICENSE FOR MORE DETAILS.
#
# YOU SHOULD HAVE RECEIVED A COPY OF THE GNU GENERAL PUBLIC LICENSE
# ALONG WITH THIS PROGRAM; IF NOT, WRITE TO EDF R&D CODE_ASTER,
#    1 AVENUE DU GENERAL DE GAULLE, 92141 CLAMART CEDEX, FRANCE.
# ==============================================================================

"""
This module gives functions to manipulate Code_Aster jobs :
- return informations from astk server configuration,
- edit output or error file,
- get job status,
- kill and delete the job files,
- search strings in output of a job,
- purge 'flasheur' directory.
The functions are called by an ASTER_RUN object.
"""

import os
import os.path as osp
import signal
import glob
import re

from asrun.common.i18n   import _
from asrun.common.rcfile import get_nodepara
from asrun.mystring      import print3, convert, ufmt
from asrun.profil        import ASTER_PROFIL
from asrun.system        import shell_cmd
from asrun.common_func   import get_tmpname
from asrun.common.utils  import YES_VALUES
from asrun.common.sysutils import is_localhost, is_localdisplay


def SetParser(run):
    """Configure the command-line parser, add options name to store to the list,
    set actions informations.
    run : ASTER_RUN object which manages the execution
    """
    acts_descr = {
        'info' : {
            'method' : Info,
            'syntax' : '',
            'help'   : _(u'returns informations from astk server configuration : '\
                    'batch, interactive (yes/no, limits), compute nodes, '\
                    'versions')
        },
        'actu' : {
            'method' : Actu,
            'syntax' : 'job_number job_name mode',
            'help'   : _(u'returns the state, diagnosis, execution node, spent '\
                    'cpu time and working directory of a job')
        },
        'edit' : {
            'method' : Edit,
            'syntax' : 'job_number job_name mode output|error DISPLAY',
            'help'   : _(u'opens output or error file on the provided display')
        },
        'del' : {
            'method' : Del,
            'syntax' : 'job_number job_name mode [node] [--signal=...]',
            'help'   : _(u'kill a job and delete related files')
        },
        'purge_flash' : {
            'method' : Purge,
            'syntax' : 'job_number1 [job_number2 [...]]]',
            'help'   : _(u'delete files of jobs which are NOT in the list')
        },
        'tail' : {
            'method' : Tail,
            'syntax' : 'job_number job_name mode fdest nb_lines [regexp]',
            'help'   : _(u'output the last part of fort.6 file or filter lines ' \
                    'matching a pattern')
        },
    }
    opts_descr = {
        'signal' : {
            'args'   : ('--signal', ),
            'kwargs' : {
                'action'  : 'store',
                'default' : 'KILL',
                'choices' : ('KILL', 'USR1'),
                'dest'    : 'signal',
                'help'    : _(u'signal to the job (KILL|USR1)')
            }
        },
        'result_to_output' : {
            'args'   : ('--result_to_output', ),
            'kwargs' : {
                'action'  : 'store_true',
                'default' : False,
                'dest'    : 'result_to_output',
                'help'    : _(u'writes result to stdout instead of FILE')
            }
        },
    }
    title = 'Options for operations on jobs'
    run.SetActions(
            actions_descr = acts_descr,
            actions_order = ['info', 'actu', 'edit', 'tail', 'del', 'purge_flash'],
            group_options=True, group_title=title, actions_group_title=False,
            options_descr = opts_descr,
    )


def Info(run, *args):
    """Return informations from astk server configuration.
    """
    if len(args)>0:
        run.parser.error(_(u"'--%s' requires no argument") % run.current_action)
    
    on_machref = run.get('rep_agla', 'local') != 'local'
    
    # astk server version
    l_v = run.__version__.split('.')
    try:
        svers = '.'.join(['%02d' % int(i) for i in l_v])
    except ValueError:
        svers = run.__version__
    output = ["@SERV_VERS@", svers, "@FINSERV_VERS@"]

    # all these parameters are necessary   
    output.append("@PARAM@")
    output.extend(['%s : %s' % (p, run[p]) for p in ('plate-forme', 'batch',
        'batch_memmax', 'batch_tpsmax', 'batch_nbpmax', 'interactif',
        'interactif_memmax', 'interactif_tpsmax', 'interactif_nbpmax')])
    output.append("@FINPARAM@")

    # Code_Aster versions
    output.append("@VERSIONS@")
    lvers = [osp.basename(v) for v in run.get('vers', '').split()]
    output.extend(['vers : %s' % v for v in lvers])
    output.append("@FINVERSIONS@")

    output.append("@DEFAULT_VERSION@")
    output.append('default_vers : %s' % osp.basename(run.get('default_vers', '')))
    output.append("@FINDEFAULT_VERSION@")

    output.append("@NOEUDS@")
    output.extend(['noeud : %s' % n for n in run.get('noeud', '').split()])
    output.append("@FINNOEUDS@")
    
    # batch scheduler informations
    if run['batch'] in YES_VALUES:
        l_queue_group = run.get('batch_queue_group', '').split()
        l_group = []
        for group in l_queue_group:
            l_cl = run.get('batch_queue_%s' % group, '').split()
            if len(l_cl) > 0:
                l_group.append(group)
        if len(l_group) > 0:
            output.extend(["@QUEUE_INFO@", ' '.join(l_group), "@FINQUEUE_INFO@"])

    if on_machref:
        output.extend(["@MACHREF@", "@FINMACHREF@"])
        output.extend(["@REX_URL@", run.get('rex_url', ''), "@FINREX_URL@"])
        output.extend(["@REX_REPFICH@", run.get('rep_rex', ''), "@FINREX_REPFICH@"])
        output.extend(["@MAIL_ATA@", run.get('mail_ata', ''), "@FINMAIL_ATA@"])
        
        # read identAster file
        f_ident = os.path.join(run['rep_identAster'], 'identAster')
        if not os.path.isfile(f_ident):
            run.Sortie(4)
        f = open(f_ident, 'r')
        l_ident = [line.strip().replace('\t', ' ') \
                    for line in f if re.search('^[ ]*%', line)==None]
        f.close()
        d_user = {}
        for l in l_ident:
            l_i = l.split()
            if len(l_i) < 2:
                continue
            d_user[l_i[1]] = l_i
        
        # extract user info
        role = 'UA'
        details = ''
        user = run.system.getuser_host()[0]
        if d_user.has_key(user):
            l = d_user[user]
            if len(l) >= 6:
                role = l[5].replace('/', ' ')
                details = ('%s %s %s %s' % (l[0], l[1], l[2], l[3])).replace('/', ' ')
        
        output.extend(["@ROLE@", role, "@FINROLE@"])
        if details:
            output.extend(["@INFOID@", details, "@FININFOID@"])

    # message of the day
    output.append("@MOTD@")
    if run.get('motd', '') != '' and os.path.exists(run['motd']):
        output.append(open(run['motd'], 'r').read())
    output.append("@FINMOTD@")
    
    print3(os.linesep.join(output))


def _sumtps(l_scpu):
    """Return the sum in seconds of the times given as strings in `l_scpu`.
    Format of each times in [dd:[hh:[mm:[ss]]]]
    """
    tot = 0
    if len(l_scpu) < 1:
        tot = '_'
    for scpu in l_scpu:
        try:
            s = [int(s) for s in scpu.split(':')]
        except ValueError:
            s = []
        if   len(s) == 4:
            tot += 86400*s[0]+3600*s[1]+60*s[2]+s[3]
        elif len(s) == 3:
            tot += 3600*s[0]+60*s[1]+s[2]
        elif len(s) == 2:
            tot += 60*s[0]+s[1]
        elif len(s) == 1:
            tot += s[0]
    return tot


def Func_actu(run, *args):
    """Return state, diagnosis, node, cpu time and working directory of a job.
    """
    if len(args) != 3:
        run.parser.error(_(u"'--%s' takes exactly %d arguments (%d given)") % \
            (run.current_action, 3, len(args)))
    
    njob, nomjob, mode = args
    # defaults
    etat  = '_'
    diag  = '_'
    node  = '_'
    tcpu  = '_'
    wrk   = '_'
    queue = '_'
    psout = ''
    on_machref = run.get('rep_agla', 'local') != 'local'
    # the real job id may differ
    jobid = str(njob)
    
    # 1. get information about the job
    # 1.1. batch mode
    if mode == "batch":
        m = 'b'
        # for LSF
        if run['batch_nom'] == 'LSF':
            cmd = ''
            if run.get('batch_ini', '') != '':
                cmd = '. %s ; ' % run['batch_ini']
            cmd += '%s -uall' % run['batch_job']
            jret, out = run.Shell(cmd)
            mat = re.search('(^ *%s.*)' % njob, out, re.MULTILINE)
            if mat == None:
                etat = "ENDED"
            else:
                lin = mat.group(1).split()
                if len(lin) >= 3:
                    etat = lin[2]
                    if etat != "PEND":
                        node = re.split('[*@]+', lin[5])[-1]
                if etat.find('SUSP')>-1:
                    etat = 'SUSPENDED'
        # for PBS
        elif run['batch_nom'] == 'PBS':
            cmd = ''
            if run.get('batch_ini', '') != '':
                cmd = '. %s ; ' % run['batch_ini']
            cmd += '%s -f %s' % (run['batch_job'], njob)
            jret, out = run.Shell(cmd)
            # real job id
            mjid = re.search('Job Id: *(\S+)', out, re.MULTILINE)
            if mjid != None:
                jobid = mjid.group(1)
            # job state
            metat = re.search(' job_state *= *(\S+)', out, re.MULTILINE)
            if metat == None:
                etat = "ENDED"
            else:
                etat = { 'R' : 'RUN', 'E'  :'RUN',
                            'W' : 'PEND', 'T' : 'PEND', 'Q' : 'PEND',
                            'S' : 'SUSPENDED', 'H' : 'SUSPENDED',
                   }.get(metat.group(1), '?')
            # queue
            mqueue = re.search(' queue *= *(\S+)', out, re.MULTILINE)
            if mqueue != None:
                queue = mqueue.group(1)
            # exec host
            mnode = re.search(' exec_host *= *(\S+)/', out, re.MULTILINE)
            if mnode != None:
                node = mnode.group(1)
        # for Sun Grid Engine
        elif run['batch_nom'] == 'SunGE':
            cmd = ''
            if run.get('batch_ini', '') != '':
                cmd = '. %s ; ' % run['batch_ini']
            cmd += run['batch_job']
            jret, out = run.Shell(cmd)
            run.DBG(out, all=True)
            mat = re.search('(^ *%s.*)' % njob, out, re.MULTILINE)
            if mat == None:
                etat = "ENDED"
            else:
                lin = mat.group(1).split()
                if len(lin) >= 5:
                    etat = lin[4]
                    if re.search('[wh]+', etat) != None:
                        etat = "PEND"
                    elif re.search('[sST]+', etat) != None:
                        etat = "SUSPENDED"
                    else:
                        etat = "RUN"
                    if len(lin) >= 8:
                        l_q = lin[7].split('@')
                        queue = l_q[0]
                        if len(l_q) > 1:
                            node  = l_q[1].split('.')[0]
                # because I don't known the running node on which to call 'ps'
                if etat == 'RUN' and node in ('', '_'):
                    cmd = ''
                    if run.get('batch_ini', '') != '':
                        cmd = '. %s ; ' % run['batch_ini']
                    cmd += '%s -j %s' % (run['batch_job'], njob)
                    jret, out = run.Shell(cmd)
                    expr = re.compile('^usage *[0-9]* *: *cpu=([0-9:]+)', re.MULTILINE)
                    l_field = expr.findall(out)
                    tcpu = _sumtps(l_field)
        else:
            run.Mess(_(u'unknown batch scheduler'), '<F>_ERROR')

    # 1.2. interactive mode
    elif mode == "interactif":
        m = 'i'
        # astk profile
        pr_astk = os.path.join(run['flasheur'], '%s.p%s' % (nomjob, njob))
        # if it doesn't exist the job is ended
        etat = "ENDED"
        if os.path.isfile(pr_astk):
            prof = ASTER_PROFIL(pr_astk, run)
            node = prof['noeud'][0]
    
    else:
        run.Mess(_(u'unexpected mode : %s') % mode, '<F>_UNEXPECTED_VALUE')

    # 2. query the process
    if node != '_':
        jret, psout = run.Shell(run['ps_cpu'], mach=node)
        # ended ?
        if mode == "interactif" and psout.find('btc.%s' % njob) > -1:
            etat = "RUN"

    # 3.1. the job is ended
    if etat == "ENDED":
        fdiag = os.path.join(run['flasheur'], '%s.%s%s' % (nomjob, m, njob))
        if os.path.isfile(fdiag):
            diag = open(fdiag, 'r').read().split(os.linesep)[0] or "?"
        if diag == '?' :
            diag = '<F>_SYSTEM'
            # try to find something in output
            fout = os.path.join(run['flasheur'], '%s.o%s' % (nomjob, njob))
            if os.path.isfile(fout):
                f = open(fout, 'r')
                for line in f:
                    if line.find('--- DIAGNOSTIC JOB :')>-1:
                        diag = line.split()[4]
                    elif line.find('Cputime limit exceeded')>-1:
                        diag = '<F>_CPU_LIMIT_SYSTEM'
                f.close()
            # working directory
            wrk = get_nodepara(node, 'rep_trav', run['rep_trav'])
            # copy fort.6 to '.o'
            if node != '_':
                ftcp = get_tmpname(run, run['tmp_user'], basename='actu')
                fort6 = os.path.join(wrk, '%s.%s.fort.6.%s' % (nomjob, njob, m))
                jret = run.Copy(ftcp, '%s:%s' % (node, fort6), niverr='SILENT')
                if os.path.isfile(ftcp):
                    txt = [os.linesep*2]
                    txt.append('='*48)
                    txt.append('===== Pas de diagnostic, recopie du fort.6 =====')
                    txt.append('='*48)
                    txt.append(open(ftcp, 'r').read())
                    txt.append('='*48)
                    txt.append('='*48)
                    txt.append(os.linesep*2)
                    f = open(fout, 'a')
                    f.write(os.linesep.join(txt))
                    f.close()
    
    else:
    # 3.2. job is running
        if etat in ('RUN', 'SUSPENDED'):
            # working directory
            wrk = get_nodepara(node, 'rep_trav', run['rep_trav'])
            if not on_machref or mode == 'interactif':
                wrk = get_tmpname(run, basename=mode, node=node, pid=njob)
            else:
                wrk = os.path.join(wrk, str(jobid))
        if etat == 'RUN' and tcpu == '_':
            # tcpu may have been retrieved upper
            l_tcpu = []
            for line in psout.split(os.linesep):
                if re.search('\-num_job +%s' % njob, line) != None and \
                    re.search('\-mode +%s' % mode, line) != None and \
                    (not on_machref or line.find(run['rep_agla'])<0):
                    l_tcpu.append(re.sub('\..*$', '', line.split()[0]).replace('-', ':'))
            if len(l_tcpu)>0:
                tcpu = _sumtps(l_tcpu)
    
    # 4. return the result
    if node == "":
        node = "_"
    return etat, diag, node, tcpu, wrk, queue
    

def Actu(run, *args):
    """Return state, diagnosis, node, cpu time and working directory of a job.
    """
    print3("ETAT=%s DIAG=%s EXEC=%s TCPU=%s REP_TRAV=%s QUEUE=%s" % Func_actu(run, *args))


def Edit(run, *args):
    """Open output or error file of a job.
    """
    if len(args) != 5:
        run.parser.error(_(u"'--%s' takes exactly %d arguments (%d given)") % \
            (run.current_action, 5, len(args)))

    njob, nomjob, mode, typ, displ = args
    d_ext = { 'output' : 'o', 'error' : 'e', 'export' : 'p',
             'script' : 'u', 'diag'  : mode[0] }
    ext = d_ext.get(typ, 'X')
    
    # get editor command line
    edit = run.get('editor')
    # backwards compatibility
    if not edit:
        edit = run['editeur']
    
    # filename to edit
    fich = os.path.join(run['flasheur'], '%s.%s%s' % (nomjob, ext, njob))
    run.DBG('filename =', fich)
    
    if os.path.isfile(fich):
    # write file content to stdout
        if run.get('result_to_output'):
            run.PrintExitCode = False
            print3(open(fich, "r").read())
        # edit the file
        else:
            if re.search('@D', edit) != None:
                edit = edit.replace('@D', displ)
            elif edit.find('display') < 0 and not is_localdisplay(displ):
                edit = '%s -display %s' % (edit, displ)
            else:
                pass

            cmd = '%s %s' % (edit, fich)
            kret, out = run.Shell(cmd, bg=True)
    else:
        run.Sortie(4)


def Del(run, *args, **kwargs):
    """Kill a job and delete related files.
    """
    if len(args) < 3:
        run.parser.error(_(u"'--%s' takes at least %d arguments (%d given)") % \
            (run.current_action, 3, len(args)))
    elif len(args) > 4:
        run.parser.error(_(u"'--%s' takes at most %d arguments (%d given)") % \
            (run.current_action, 4, len(args)))
    
    # 0. arguments
    njob, nomjob, mode = args[:3]
    if len(args) > 3:
        node = args[3]
    else:
        node = ''
    sent_signal = run['signal']
    if kwargs.get('signal'):
        sent_signal = kwargs['signal']
    
    use_batch_cmd = (mode == "batch" and sent_signal == 'KILL')
        
    # 1. retrieve the job status
    etat, diag, node, tcpu, wrk, queue = Func_actu(run, njob, nomjob, mode)
    run.DBG(u"actu returns : etat/diag/node/tcpu/wrk/queue", (etat, diag, node, tcpu, wrk, queue))
    
    # 2. send the signal
    if etat in ('RUN', 'SUSPENDED', 'PEND'):
        if use_batch_cmd:
            if run['batch_nom'] in ('LSF', 'PBS', 'SunGE'):
                cmd = ''
                if run.get('batch_ini', '') != '':
                    cmd = '. %s ; ' % run['batch_ini']
                cmd += '%s %s' % (run['batch_kil'], njob)
                iret, out = run.Shell(cmd)
                if iret != 0:
                    run.Sortie(4)
            else:
                run.Mess(_(u'unknown batch scheduler'), '<F>_ERROR')
        else:
            numpr, psout = '', ''
            # get process id
            if node != '_':
                jret, psout = run.Shell(run['ps_pid'], mach=node)
                exp = re.compile('^ *([0-9]+) +(.*)\-num_job +%s.*\-mode +%s' % (njob, mode),
                        re.MULTILINE)
                res = exp.findall(psout)
                res.reverse()  # the relevant process should be the last one
                run.DBG(u"processes :", res)
                for numj, cmd in res:
                    # "sh -c" is automatically added by os.system
                    if cmd.find(shell_cmd) < 0 and cmd.find("sh -c") < 0:
                        numpr = int(numj)
                        run.DBG(u"Signal will be sent to process : %s" % numpr)
                        break
            if numpr != '':
                if is_localhost(node):
                    os.kill(numpr, getattr(signal, 'SIG%s' % sent_signal))
                else:
                    iret, psout = run.Shell('kill -%s %s' % (sent_signal, numpr), mach=node)
            else:
                run.DBG(u'<job.Del> process not found :' , psout, u'node = %s' % node, all=True)
    
    # 3. delete files
    if sent_signal == 'KILL':
        l_fich = glob.glob(os.path.join(run['flasheur'], '%s.?%s' % (nomjob, njob)))
        for f in l_fich:
            run.Delete(f)
    


def Func_tail(run, njob, nomjob, mode, nbline, expression=None):
    """Return the output the last part of fort.6 file or filter lines matching a pattern.
    """
    if expression is None or expression.strip() == "":
        cmd = 'tail -%s ' % nbline
    else:
        cmd = 'egrep -- \'%s\' ' % expression
    # retrieve the job status
    etat, diag, node, tcpu, wrk, queue = Func_actu(run, njob, nomjob, mode)
    # fill output file
    s_out = ''
    if mode == 'batch' and run['batch_nom'] == 'SunGE':
        s_out = _(u"Sorry I don't know how to ask Sun Grid Engine batch " \
                "scheduler the running node.")
    if etat == 'RUN':
        # file to parse
        fich = os.path.join(wrk, 'fort.6')
        run.DBG(ufmt(u'path to fort.6 : %s', fich))
        if node != '_':
            mach = node
            fich = '%s:%s' % (node, fich)
        else:
            mach = ''
        # execute command
        if run.Exists(fich):
            jret, s_out = run.Shell(cmd + run.PathOnly(fich), mach=mach)
    return etat, diag, s_out



def Tail(run, *args):
    """Output the last part of fort.6 file or filter lines matching a pattern.
    """
    if len(args) < 5:
        run.parser.error(_(u"'--%s' takes at least %d arguments (%d given)") % \
            (run.current_action, 5, len(args)))
    elif len(args) > 6:
        run.parser.error(_(u"'--%s' takes at most %d arguments (%d given)") % \
            (run.current_action, 6, len(args)))
    
    # arguments
    njob, nomjob, mode, fdest, nbline = args[:5]
    expression = None
    if len(args) > 5:
        expression = args[5]
    
    etat, diag, s_out = Func_tail(run, njob, nomjob, mode, nbline, expression)
    if s_out == "":
        run.Mess(_(u'file is empty'))

    print3("JOB=%s JOBID=%s ETAT=%s DIAG=%s" % (nomjob, njob, etat, diag))
    # exit if job isn't running
    if etat != 'RUN':
        run.Sortie(4)
    
    # send output file
    if run.IsRemote(fdest):
        ftmp = get_tmpname(run, run['tmp_user'], basename='tail')
        open(ftmp, 'w').write(convert(s_out))
        jret = run.Copy(fdest, ftmp)
    else:
        fdest = run.PathOnly(fdest)
        open(fdest, 'w').write(convert(s_out))


def Purge(run, *args):
    """Delete files of the jobs which are NOT listed in args.
    """
    if len(args) < 1:
        run.parser.error(_(u"'--%s' takes at least %d arguments (%d given)") % \
            (run.current_action, 1, len(args)))
    
    # 0. arguments
    njob = len(args)
    l_job = []
    for j in args:
        try:
            l_job.append(int(j))
        except ValueError:
            run.parser.error(_(u'incorrect job number : %s') % j)

    # 1. action
    l_keep = []
    l_f = os.listdir(run['flasheur'])
    for f in l_f:
        delete = True
        for j in l_job:
            if re.search('\.[a-z]?%d$' % j, f) != None:
                delete = False
                break
        if delete:
            run.Delete(os.path.join(run['flasheur'], f), verbose=True)


def parse_actu_result(txt):
    """Decode output of the Actu function.
    """
    resu = ("_",) * 6
    expr = re.compile("ETAT=(.+) +DIAG=(.+) +EXEC=(.+) +TCPU=(.+) +REP_TRAV=(.+) +QUEUE=(.+)")
    mat = expr.search(txt)
    if mat is not None:
        resu = mat.groups()
    return resu


def parse_tail_result(txt):
    """Decode output of the Tail function.
    """
    resu = ("_",) * 4
    expr = re.compile("JOB=(.+) JOBID=(.+) ETAT=(.+) DIAG=(.+)")
    mat = expr.search(txt)
    if mat is not None:
        resu = mat.groups()
    return resu

