# ubuntuone.syncdaemon.local_rescan - local rescanning
#
# Author: Facundo Batista <facundo@canonical.com>
#
# Copyright 2009 Canonical Ltd.
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 3, as published
# by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranties of
# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR
# PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program.  If not, see <http://www.gnu.org/licenses/>.
'''Module that implements the Local Rescan.'''

import collections
import os
import functools
import logging
import errno
import stat

from ubuntuone.syncdaemon.filesystem_manager import get_stat
from twisted.internet import defer, reactor

class ScanTransactionDirty(Exception):
    '''The transaction was dirty.'''

class ScanNoDirectory(Exception):
    '''The whole directory went away.'''

# local rescan logger
lr_logger = logging.getLogger('ubuntuone.SyncDaemon.local_rescan')
log_info = functools.partial(lr_logger.log, logging.INFO)
log_debug = functools.partial(lr_logger.log, logging.DEBUG)
log_error = functools.partial(lr_logger.log, logging.ERROR)
log_warning = functools.partial(lr_logger.log, logging.WARNING)

class LocalRescan(object):
    '''Local re-scanner.

    Compares the real disc with FSM's metadata, and pushes the changes to EQ.
    '''
    def __init__(self, vm, fsm, eq, aq):
        self.vm = vm
        self.fsm = fsm
        self.eq = eq
        self.aq = aq
        self._queue = collections.deque()
        self._previous_deferred = None

    def start(self):
        '''Start the comparison.'''
        log_info("start scan all shares")
        to_scan = self._get_shares()
        for share in to_scan:
            try:
                mdobj = self.fsm.get_by_path(share.path)
            except KeyError:
                # this could happen in a strange corruption situation where FSM
                # lost the share information, so we remove it, because VM will
                # download it again
                self.vm.share_deleted(share.id)
            else:
                self._queue.appendleft((share, share.path, True, mdobj.mdid))
        d = self._queue_scan()
        d.addCallback(self._process_trash)
        return d

    def _process_trash(self, _):
        '''Process the trash of FSM and send AQ orders to unlink the nodes.'''
        log_info("processing trash")
        for share_id, node_id, parent_id in self.fsm.get_iter_trash():
            log_debug("unlink from trash: share_id: %s   parent_id: %s   "
                      "node_id: %s",share_id, parent_id, node_id)
            self.aq.unlink(share_id, parent_id, node_id)

    def _get_shares(self, access_level="Modify"):
        '''Get all the shares to compare.'''
        for sid in self.vm.shares:
            share = self.vm.shares[sid]
            if share.access_level == access_level:
                yield share

    def scan_dir(self, mdid, direct):
        '''Compares one directory between metadata and disk.'''
        log_info("scan dir: %r  mdid: %s", direct, mdid)

        # get the share to get only a subset of mdids
        for share in self._get_shares():
            if direct.startswith(share.path):
                break
        else:
            # not in RW shares; let's check RO shares, otherwise it's an error
            for share in self._get_shares("View"):
                if direct.startswith(share.path):
                    return
            log_error("The received path is not in any share!")
            raise ValueError("The received path is not in any share!")

        # uglier than os.path.exists and isdir, but only hit the disk once
        stat_result = get_stat(direct)
        if stat_result is None:
            m = "The received path is not in disk: path %r  mdid %s"
            log_debug(m, direct, mdid)
            # it's better to delay the rescan some miliseconds, as if a
            # directory was moved, it's better to leave stuff some time to
            # settle down
            reactor.callLater(.1, self._send_scan_error, mdid)
            return
        elif not stat.S_ISDIR(stat_result.st_mode):
            m = "The path is in disk but it's not a dir: %r" % direct
            log_error(m)
            raise ValueError("m")

        # No, 'share' is surely defined; pylint: disable-msg=W0631
        self._queue.appendleft((share, direct, False, mdid))
        return self._queue_scan()

    def _send_scan_error(self, mdid):
        '''Sends the scan error event.'''
        self.eq.push("LR_SCAN_ERROR", mdid)

    def _queue_scan(self):
        '''If there's a scan in progress, queue the new one for later.'''
        if self._previous_deferred is None:
            self._previous_deferred = defer.Deferred()
            self._process_next_queue(None)
        return self._previous_deferred

    def _process_next_queue(self, _):
        '''Process the next item in the queue, if any.'''
        log_debug("process next in queue (len %d)", len(self._queue))
        if not self._queue:
            self._previous_deferred.callback(None)
            self._previous_deferred = None
            return

        # more to scan
        scan_info = self._queue.pop()

        def safe_scan():
            '''Scan safely'''
            try:
                self._scan_tree(*scan_info)
            # pylint: disable-msg=W0703
            except Exception, e:
                self._previous_deferred.errback(e)

        reactor.callLater(0, safe_scan)

    def _get_share_info(self, share_id, path):
        '''Get all the objects information for a share.'''
        share_info = []
        for obj in self.fsm.get_mdobjs_by_share_id(share_id, path):
            changd = self.fsm.changed(mdid=obj.mdid)
            share_info.append((obj.path, obj.is_dir, obj.stat, changd,
                               obj.node_id, obj.local_hash, obj.server_hash))
        return share_info

    def _scan_tree(self, share, path, scan_partials, mdid):
        '''Scans a whole tree, using the received path as root.'''
        log_debug("_scan_tree:  share_path: %r  path: %r", share.path, path)

        def go_deeper(newdirs):
            '''Explore into the subdirs.'''
            for direct in newdirs:
                log_debug("explore subdir: %r", direct)
                self._queue.appendleft((share, direct, scan_partials, mdid))

        def re_launch(failure):
            '''Explore that directory again.'''
            if failure.check(ScanTransactionDirty):
                reason = failure.getErrorMessage()
                log_debug("re queue, transaction dirty for %r, reason: %s",
                                                                  path, reason)
                self._queue.appendleft((share, path, scan_partials, mdid))
            elif failure.check(OSError, IOError):
                reason = failure.getErrorMessage()
                m = "Disk error while scanning path %r, reason: %s"
                log_debug(m, path, reason)
                if self.eq.is_frozen():
                    self.eq.freeze_rollback()
                # it's better to delay the rescan some miliseconds, as if a
                # directory was moved, it's better to leave stuff some time to
                # settle down
                reactor.callLater(.1, self._send_scan_error, mdid)
            else:
                log_error("in the scan: %s (%s)\n%s",
                          failure.type, failure.value, failure.getTraceback())
                return failure

        d = defer.succeed((share, path, scan_partials))
        d.addCallbacks(self._scan_one_dir)
        d.addCallbacks(go_deeper, re_launch)
        d.addCallback(self._process_next_queue)
        return d

    def _compare(self, dirpath, dirnames, filenames, share, scan_partials):
        '''Compare the directories with the info that should be there.'''
        log_debug("comparing directory %r", dirpath)

        # get the share info
        share_info = self._get_share_info(share.id, dirpath)
        shouldbe = self._paths_filter(share_info, dirpath, len(share.path))

        def despair(message, fullname, also_children=False, also_remove=None):
            '''Something went very bad with this node, converge!'''
            log_debug(message, fullname)
            os.rename(fullname, fullname + ".u1conflict")
            self.fsm.delete_metadata(fullname)

            # if asked, remove metadata por children
            if also_children:
                log_debug("Removing also metadata from %r children", fullname)
                # pylint: disable-msg=W0612
                for path, is_dir in self.fsm.get_paths_starting_with(fullname):
                    self.fsm.delete_metadata(path)

            # if asked, remove also that file (if still exists)
            if also_remove is not None:
                try:
                    os.remove(also_remove)
                except OSError, e:
                    if e.errno != errno.ENOENT:
                        raise


        def check_stat(fullname, statinfo):
            '''Generate event if stats differ.'''
            log_debug("comp yield STAT prv: %s", statinfo)
            newstat = os.stat(fullname)
            log_debug("comp yield STAT new: %s", newstat)
            if statinfo != newstat:
                events.append(('FS_FILE_CLOSE_WRITE', fullname))

        # check all directories
        to_scan_later = []
        events = []
        for dname in dirnames:
            fullname = os.path.join(dirpath, dname)
            if dname in shouldbe:
                is_dir, statinfo, changed, _ = shouldbe.pop(dname)
                if not is_dir:
                    # it's there, but it's a file!
                    log_debug("comp yield: file %r became a dir!", fullname)
                    events.append(('FS_FILE_DELETE', fullname))
                    events.append(('FS_DIR_CREATE', fullname))
                else:
                    if changed == "SERVER":
                        # download interrupted
                        log_debug("comp yield: dir %r in SERVER", fullname)
                        mdobj = self.fsm.get_by_path(fullname)
                        self.fsm.set_by_mdid(mdobj.mdid,
                                             server_hash=mdobj.local_hash)
                        self.fsm.remove_partial(mdobj.node_id, mdobj.share_id)
                        to_scan_later.append(fullname)
                    elif changed == "NONE":
                        # it's old, we should scan it later
                        log_debug("comp yield: dir %r will be scaned later!",
                                                                      fullname)
                        to_scan_later.append(fullname)
                    else:
                        m = "Wrong 'changed' value for %r: " + changed
                        despair(m, fullname, also_children=True)

            else:
                # hey, it's new!
                log_debug("comp yield: directory %r is new!", fullname)
                events.append(('FS_DIR_CREATE', fullname))

        # check all files
        for fname in filenames:
            fullname = os.path.join(dirpath, fname)
            if fname in shouldbe:
                is_dir, statinfo, changed, _ = shouldbe.pop(fname)
                if is_dir:
                    log_debug("comp yield: dir %r became a file!", fullname)
                    # it's there, but it's a directory!
                    events.append(('FS_DIR_DELETE', fullname))
                    events.append(('FS_FILE_CREATE', fullname))
                    events.append(('FS_FILE_CLOSE_WRITE', fullname))
                else:
                    if changed == "LOCAL":
                        # upload interrupted
                        log_debug("comp yield: file %r in LOCAL state!",
                                                                    fullname)
                        events.append(('FS_FILE_CLOSE_WRITE', fullname))
                    elif changed == "NONE":
                        # what about stat info?
                        log_debug("comp yield: file %r was here.. stat?",
                                                                    fullname)
                        check_stat(fullname, statinfo)
                    elif changed == "SERVER":
                        log_debug("comp yield: file %r in SERVER", fullname)
                        mdobj = self.fsm.get_by_path(fullname)
                        self.fsm.set_by_mdid(mdobj.mdid,
                                             server_hash=mdobj.local_hash)
                        self.fsm.remove_partial(mdobj.node_id, mdobj.share_id)
                        check_stat(fullname, statinfo)
                    else:
                        m = "Wrong 'changed' value for %r: " + changed
                        despair(m, fullname)

            else:
                # This is kept as is in order to be backward compatible in the
                # first Local rescan after the upgrade to the new version with
                # partials in a separate directory
                if fname.startswith(".u1partial.") or fname == '.u1partial':
                    # a partial file! it can be a standard file, or the one
                    # inside a dir (which will be deleted in that case)
                    if fname == '.u1partial':
                        # the one in the dir
                        realname = ''
                        realfullname = dirpath
                    else:
                        realname = fname[11:]
                        realfullname = os.path.join(dirpath, realname)
                    if realname not in shouldbe:
                        # this is the case of a .partial with no md at all!
                        m = "Found a .partial (%r) with no MD, removing! (1)"
                        log_debug(m, fullname)
                        os.remove(fullname)
                        continue

                    is_dir, statinfo, changed, _ = shouldbe.pop(realname)
                    if is_dir:
                        m = ".partial of a file that MD says it's a dir: %r"
                        despair(m, realfullname, also_remove=fullname)
                    elif changed != "SERVER":
                        m = ".partial of a file that 'changed' != SERVER: %r"
                        despair(m, realfullname, also_remove=fullname)
                    else:
                        # download interrupted
                        m = "comp yield: file %r in SERVER state!"
                        log_debug(m, fullname)
                        mdobj = self.fsm.get_by_path(realfullname)
                        self.fsm.set_by_mdid(mdobj.mdid,
                                             server_hash=mdobj.local_hash)
                        self.fsm.remove_partial(mdobj.node_id, mdobj.share_id)
                        check_stat(fullname, statinfo)

                else:
                    # hey, it's new!
                    log_debug("comp yield: file %r is new!", fullname)
                    events.append(('FS_FILE_CREATE', fullname))

                    # even if it's empty, we signal to get the hash
                    # otherwise it will never get "empty" to the server
                    events.append(('FS_FILE_CLOSE_WRITE', fullname))


        if scan_partials:
            # get the partials info before sync starts creating new partials
            try:
                partials = os.listdir(self.fsm.partials_dir)
            except OSError, e:
                if e.errno == errno.ENOENT:
                    partials = set()
                else:
                    raise
        else:
            partials = []
        # now check the partials (use a copy of the partials list)
        for partial in partials:
            partial_path = os.path.join(self.fsm.partials_dir, partial)
            # a partial file! it can be a standard file, or the one
            # inside a dir (which will be deleted in that case)
            mdid, realname = partial.split('.u1partial.')
            try:
                mdobj = self.fsm.get_by_mdid(mdid)
            except KeyError:
                # this is the case of a .partial with no md at all!
                m = "Found a .partial (%r) with no MD, removing! (2)"
                log_debug(m, partial_path)
                os.remove(partial_path)
                continue
            else:
                realfullname = self.fsm.get_abspath(mdobj.share_id, mdobj.path)

            if os.path.dirname(realfullname) != dirpath:
                # if this partial isn't for the current dirpath, ignore it!
                continue
            if realfullname == dirpath:
                realname = ''

            if realname not in shouldbe:
                # this is the case of a .partial with no md at all!
                m = "Found a .partial (%r) with no MD, removing! (3)"
                despair(m, realfullname, also_remove=partial_path)
                continue

            is_dir, statinfo, changed, _ = shouldbe.pop(realname)
            if is_dir:
                m = ".partial of a file that MD says it's a dir: %r"
                despair(m, realfullname, also_remove=partial_path)
            elif changed != "SERVER":
                m = ".partial of a file that 'changed' != SERVER: %r"
                despair(m, realfullname, also_remove=partial_path)
            else:
                # download interrupted
                m = "comp yield: file %r with partial in SERVER state!"
                log_debug(m, realfullname)
                self.fsm.set_by_mdid(mdobj.mdid,
                                     server_hash=mdobj.local_hash)
                self.fsm.remove_partial(mdobj.node_id, mdobj.share_id)

        # all these don't exist anymore
        for name, (is_dir, statinfo, changed, lhash) in shouldbe.iteritems():
            fullname = os.path.join(dirpath, name)
            if is_dir:
                if changed not in ("SERVER", "NONE"):
                    # bad metadata
                    m = "Bad 'changed': removing MD from dir %r and children"
                    log_debug(m, fullname)
                    children = self.fsm.get_paths_starting_with(fullname)
                    for path, is_dir in children:
                        self.fsm.delete_metadata(path)
                    continue

                log_debug("comp yield: directory %r is gone!", fullname)
                # it's a directory, didn't have any info inside?
                base_path = fullname[len(share.path)+1:]
                to_inform = []

                # get all the info inside that dir
                for shrpath, is_dir, statinfo, _, _, _, _ in share_info:
                    if shrpath.startswith(base_path):
                        qparts = len(shrpath.split(os.path.sep))
                        to_inform.append((qparts, shrpath, is_dir))

                # order everything from more path components to less (this
                # will assure correct upgoing walk in the tree)
                to_inform.sort(reverse=True)

                # inform deletion!
                for (_, name, is_dir) in to_inform:
                    fullname = os.path.join(share.path, name)
                    if is_dir:
                        events.append(('FS_DIR_DELETE', fullname))
                    else:
                        events.append(('FS_FILE_DELETE', fullname))
            else:
                if changed not in ("SERVER", "NONE", "LOCAL"):
                    # bad metadata
                    m = "Bad 'changed': removing MD from file %r"
                    log_debug(m, fullname)
                    self.fsm.delete_metadata(fullname)
                    continue

                # if it had content somewhen, now is gone (otherwise it was
                # never really created in the disk)
                if lhash:
                    log_debug("comp yield: file %r is gone!", fullname)
                    events.append(('FS_FILE_DELETE', fullname))
        return events, to_scan_later

    def _paths_filter(self, shrinfo, dirpath, len_shr_path):
        '''Returns the paths that belong to this dir.'''
        # paths in shares are relative, remove the first slash
        direct = dirpath[len_shr_path + 1:]
        basedir = dirpath[:len_shr_path]

        # build the dict
        filesdirs = {}
        for fpath, is_dir, statinfo, changed, node_id, lhash, shash in shrinfo:
            base, fname = os.path.split(fpath)
            if base == direct:
                # if without node_id, remove the metadata, and take it as new
                if node_id is None:
                    fullname = os.path.join(basedir, fpath)
                    m = "Deleting metadata, because of node_id=None, of %r"
                    log_debug(m, fullname)
                    self.fsm.delete_metadata(fullname)

                # if both hashes aren't set in a file, it's a non-content
                # situation, remove the metadata
                elif not is_dir and not lhash and not shash:
                    fullname = os.path.join(basedir, fpath)
                    m = "Deleting metadata, both hashes empty, of %r"
                    log_debug(m, fullname)
                    self.fsm.delete_metadata(fullname)

                    # also set the parent hashes to "", to force a new scan
                    parent = os.path.dirname(fullname)
                    log_debug("Dirtying the parent hashes, path: %r", parent)
                    self.fsm.set_by_path(parent, server_hash="", local_hash="")

                else:
                    filesdirs[fname] = is_dir, statinfo, changed, lhash
        return filesdirs

    def _scan_one_dir(self, scan_info):
        '''Gets one dir and compares with fsm.'''
        share, dirpath, partials = scan_info

        if self.eq.inotify_has_watch(dirpath):
            log_debug("Path already has watch: %r", dirpath)
        else:
            log_debug("Adding watch to %r", dirpath)
            self.eq.inotify_add_watch(dirpath)

        to_later = []
        self.eq.freeze_begin(dirpath)

        def scan():
            '''the scan, really'''
            log_debug("scanning the dir %r", dirpath)
            listdir = os.listdir(dirpath)

            # get the info from disk
            dnames = []
            fnames = []
            for something in listdir:
                fullname = os.path.join(dirpath, something)
                stat_result = get_stat(fullname)
                if stat_result is None or \
                   stat.S_ISLNK(stat_result.st_mode):
                    continue
                elif stat.S_ISDIR(stat_result.st_mode):
                    dnames.append(something)
                elif stat.S_ISREG(stat_result.st_mode):
                    fnames.append(something)
                else:
                    log_warning("Path: %r isn't a dir, file or symlink.",
                                fullname)

            events, to_scan_later = self._compare(dirpath, dnames, fnames,
                                                  share, partials)
            to_later.extend(to_scan_later)
            return events

        def control(dirty):
            '''controls that everything was ok'''
            if dirty:
                self.eq.freeze_rollback()
                raise ScanTransactionDirty("dirty!")
            else:
                return to_later

        d = defer.execute(scan)
        d.addCallback(self.eq.freeze_commit)
        d.addCallback(control)
        return d
