###########################################################################
# clive, video extraction utility
# Copyright (C) 2007 Toni Gundogdu
#
# clive is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 0.1.2-1307 USA
###########################################################################

import os.path
import urllib2
import sys
import time

from cStringIO import StringIO
from BaseHTTPServer import BaseHTTPRequestHandler

from clive.net import *
from clive.util import *
from clive.parse import *
from clive.opts import *


__all__ = ['Extract']


class Extract:
  def __init__(self, onsay, onscan, onread, onprompt):
    self._onsay = onsay
    self._onscan = onscan
    self._onread = onread
    self._onprompt = onprompt

    self.optparser = Options()
    (self.opts, self.args, self.conf) = self.optparser.parse()

    self.proxyconf = \
      (self.opts.use_proxy,
        self.opts.proxy,
        self.opts.proxy_user,
        self.opts.proxy_pass)

    self.cookieconf = \
      (os.path.exists(self.optparser.cookiefile),
        self.optparser.cookiefile)

  def scan(self):
    """
    Scan for videos at specified URL.
    Returns a list containing a dictionary for each
    found video of three elements (xurl,file,size).
    """
    urls = self._checkarg()
    vurls = []
    self._onscan('please wait...')

    for url in urls:
      cn = self.newhttpget(url,hook=None,gzip=True)

      # Check URL for embedded video URLs
      if not is_direct_vurl(url):
        if is_embed_url(url):
          cn.get(url)
          eurls = self._parseembeds(cn.data)
          for eurl in eurls: # copy-append
            self._chkappend(vurls,eurl)
          continue
      # Either direct video link or non-embed URL
      self._chkappend(vurls,url)

    # Visit each video page URL for title,length
    videos = [] # Holds video info (url,file,size)
    ignored = []
    i = 1

    for vurl in vurls:
      try:
        self._onscan('checking %d/%d...' % (i,len(vurls)))

        cn = self.newhttpget(vurl,hook=None,gzip=True)

        if not is_direct_vurl(vurl):
          cn.get(vurl)
          pp = PageParser()
          (xurl,filename) = pp.parse(cn,vurl,self.opts)
        else:
          xurl = vurl
          filename = parse_fname_direct(vurl, self.opts, self._onsay)
          if not filename:
            raise CliveError('error : invalid direct video url')

        length = cn.reqlength(xurl)

        if length == 0:
          raise CliveError('warn : ignored %s ' \
            '(zero file length)' % filename)
            
        videos.append(
          {'url':xurl,'file':filename,'size':length,'owrite':None}
        )

      except CliveError,reason:
        ignored.append({'url':vurl,'reason':reason.text})

      except urllib2.URLError, x:
        reason = repr(x)
        if hasattr(x, 'reason'):
          (code,text) = x.reason
          reason = '%d: %s' % (code,text)
        elif hasattr(x, 'code'):
          (s,l) = BaseHTTPRequestHandler.responses[x.code]
          reason = '%d: %s (%s)' % (x.code, s, l)
        ignored.append({'url':vurl,'reason':reason})

      i += 1

    self._onscan('found %d video(s)' % len(videos))
    return (videos,ignored)

  def extract(self, video):
    """Extract the specified video"""
    saveas = video['file']
    size = byteshuman(video['size'])

    if self.opts.prefix:
      saveas = pathjoin(self.opts.prefix,saveas)

    if video['owrite'] == None:
      if self.opts.skip_existing:
        video['owrite'] = not os.path.exists(saveas)
      else:
        if self.opts.newt:
          video['owrite'] = True
        else:
          video['owrite'] = self._checkoverwrite(saveas)

    if video['owrite']:
      self._onsay('get : %s (%s)' % \
        (os.path.basename(saveas),size), showalways=True)
      cn = self.newhttpget(video['url'],hook=self._onread,gzip=False)
      cn.get(video['url'], saveas)
    else:
      self._onsay('skip : %s (%s)' % \
        (os.path.basename(saveas),size), showalways=True)

    self._reencode(saveas)
    self._play(saveas)

  def newhttpget(self, url, hook, gzip):
    """Return a new instance of HTTPGet"""
    cookies = self.cookieconf # Use preconfigured settings for cookies

    if url:
      if url.lower().find('youtube.com') == -1:
        # Disable any cookie use if the host is not youtube.com
        cookies = (False,None)

    return HTTPGet(
      proxy = self.proxyconf,
      agent = self.opts.agent,
      progresshook = hook,
      cookies = cookies,
      gzip = gzip
    )

  def getcookiefile(self):
    """Return a path to the cookie file"""
    return self.optparser.cookiefile

  # Non-public

  def _checkarg(self):
    """
    Check args for URL or file.
    Assume stdin if neither is being used.
    """
    if not self.args:
      data = sys.stdin.read()
      f = StringIO(data)
      return list_rmdup([normalize_url(ln.rstrip('\r\n')) \
        for ln in f.readlines()])
    else:
      if os.path.exists(self.args[0]):
        return readbatchfile(self.args[0])
      else:
        return [normalize_url(self.args[0])]

  def _parseembeds(self, html):
    """Parses HTML for <embed> tags"""
    parser = EmbedParser()
    parser.feed(StringIO(html).read())
    parser.close()
    return parser.vurls

  def _chkappend(self, vurls, vurl):
    """Check if video URL exists in the list already"""
    for v in vurls:
      if v == vurl:
        return
    vurls.append(vurl)

  def _checkoverwrite(self, filename):
    """Check if file exists already"""
    o = self.opts
    if os.path.exists(filename) and not o.overwrite:
      a = self._onprompt('overwrite existing file (y/N):')
      if a.lower() != 'y':
        return False
      else:
        os.remove(filename)
    return True        

  def _reencode(self, saveas, format=None):
    """
    Re-encode an extracted video to another format with a specified
    encoder program (e.g. ffmpeg).
    """
    if not self.opts.encoder:
      # The path to the encoder was previous unspecified
      return

    if not format:
      if self.opts.mpeg: self._reencode(saveas, 'mpeg')
      if self.opts.avi: self._reencode(saveas,  'avi')
      if self.opts.flv: self._reencode(saveas,  'flv')
    else:
      if saveas.endswith('.'+format):
        self._onsay('info : ignored --%s (same as source)' % format)
        return

      if self.opts.encoder.find('%i') == -1:
        self._onsay('warn : ignored --%s, check --encoder, ' \
                    '%%i not found' % format)
        return

      if self.opts.encoder.find('%o') == -1:
        self._onsay('warn : ignored --%s, check --encoder, ' \
                    '%%o not found' % format)
        return
    
      # Check for executable
      (path_exe,args) = self._checkbinary(self.opts.encoder)
      (ffmt,fout,ferr) = self._stdfilenames(saveas,format)
      (path,filename) = os.path.split(saveas)

      self._onsay('info : %s -> %s' % (filename,format))
      cmd = self.opts.encoder.replace('%i','"%s"' % saveas)
      cmd = cmd.replace('%o','"%s"' % ffmt)
      (fin,fout,ferr) = os.popen3(cmd)

      self._checkencoder(ferr)

  def _play(self, saveas):
    """Play video"""
    if self.opts.play:
      if not self.opts.player:
        self._onsay('warn : ignored --play, use --player')
        return
      else:
        if self.opts.player.find('%i') == -1:
          self._onsay('warn : ignored --play, check --player, %i not found')
          return

        (path_exe,args) = self._checkbinary(self.opts.player)

        format = None
        videofile = None

        if self.opts.play_format and self.opts.encoder:
          if self.opts.play_format == 'mpeg' and self.opts.mpeg:
            format = 'mpeg'
          elif self.opts.play_format == 'avi' and self.opts.avi:
            format = 'avi'
          elif self.opts.play_format == 'flv' and self.opts.flv:
            format = 'flv'
          else:
            self._onsay('warn : ignored --play-format, invalid format')
            return

        if format and not saveas.endswith('.%s' % format):
          (videofile,fout,ferr) = self._stdfilenames(saveas,format)

        if not videofile:
          videofile = saveas

        self._onsay('play : %s' % videofile)

        cmd = self.opts.player.replace('%i','"%s"' % videofile)
        #cmd = '"%s" %s "%s"' % (path_exe, args, saveas)
        (fin,fout,ferr) = os.popen3(cmd)

        self._checkplayer(fout,ferr)

  def _checkbinary(self, path):
    """
    Checks if binary executable exists.
    Look from given path, if that fails, try the paths
    in the PATH environment variable.
    """
    (path,exe,args) = self._splitpath(path)
    path_exe = os.path.join(path,exe)

    if not os.path.isfile(path_exe):
      if not self._checkenvpath(path_exe):
        raise CliveError('error : %s not found' % path_exe)

    return (path_exe,args)

  def _checkenvpath(self,bin):
    """Look for a binary from the environment variable PATHs"""
    for p in os.environ['PATH'].split(os.pathsep):
      n = os.path.join(p,bin)
      if os.path.isfile(n):
        return True
    return False

  def _splitpath(self, path):
    """Splits path to path, executable and arguments"""
    path,exe = os.path.split(path)
    exe = exe.split(None,1)

    args = ''
    if len(exe) > 1:
      args = exe[1]

    return (path,exe[0],args)

  def _stdfilenames(self, saveas, format=None):
    """Returns <saveas>[.<fmt>.stdout,.stderr] filenames"""
    if format:
      ffmt = '%s.%s' % (saveas,format)
      fout = ffmt + '.stdout'
      ferr = ffmt + '.stderr'
    else:
      ffmt = None
      fout = saveas + '.stdout'
      ferr = saveas + '.stderr'
    return (ffmt,fout,ferr)

  def _checkencoder(self, ferr):
    """Wrap encoder error checking (ignore stdout)"""
    self._checkstream(ferr,'stderr')

  def _checkplayer(self, fout, ferr):
    """Wrap player error checking"""
    self._checkstream(fout,'stdout')
    self._checkstream(ferr,'stderr')

  def _checkstream(self, stream, name):
    """Checks the streams returned by popen3()"""
    data = stream.read()

    # No data or --quiet used
    if len(data) == 0 or not self.opts.verbose:
      return False

    ld = data.lower()

    if ld.find('access_file access error') != -1:      
      # vlc prints the above error frequently, and as far as the
      # playing goes, they seem harmless and are more of an annoyance.

      # If you need to see them, comment the 'if' line above
      # and the line below
      return False

    if ld.find('error') != -1:
      # Safe to assume if 'error' appears in the stdout/stderr
      # an error must have occurred
      a = self._onprompt('error : show %s? (Y/n):' % name)
      if not a.lower() == 'n':
        self._onsay('-'*70)
        v = self.opts.strip
        self.opts.strip = False # Disable strip temporarily
        self._onsay(data)
        self.opts.strip = v
        self._onsay('--'*70)
        return True

    return False
