'''
MSNSpider.py

Copyright 2006 Andres Riancho

This file is part of w3af, w3af.sourceforge.net .

w3af is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

w3af is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with w3af; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

'''

import core.controllers.outputManager as om

# options
from core.data.options.option import option
from core.data.options.optionList import optionList

from core.controllers.basePlugin.baseDiscoveryPlugin import baseDiscoveryPlugin
from core.controllers.w3afException import w3afException
from core.controllers.w3afException import w3afRunOnce
from core.controllers.misc.is_private_site import is_private_site

from core.data.searchEngines.msn import msn as msn
import core.data.parsers.urlParser as urlParser

from urllib2 import URLError


class MSNSpider(baseDiscoveryPlugin):
    '''
    Search MSN to get a list of new URLs
    @author: Andres Riancho ( andres.riancho@gmail.com )
    '''

    def __init__(self):
        baseDiscoveryPlugin.__init__(self)
        self._run = True
        
        # User variables
        self._resultLimit = 300

        # Internal variables
        self._fuzzable_requests = []
        
    def discover(self, fuzzableRequest ):
        '''
        @parameter fuzzableRequest: A fuzzableRequest instance that contains
                                    (among other things) the URL to test.
        '''
        if not self._run:
            # This will remove the plugin from the discovery plugins to be runned.
            raise w3afRunOnce()
        else:
            # I will only run this one time. All calls to MSNSpider return the same url's
            self._run = False
            
            msn_obj = msn( self._urlOpener )
            
            domain = urlParser.getDomain( fuzzableRequest.getURL() )
            if is_private_site( domain ):
                msg = 'There is no point in searching MSN for "site:'+ domain + '".'
                msg += ' MSN doesnt index private pages.'
                raise w3afException( msg )

            results = msn_obj.getNResults('site:'+ domain, self._resultLimit )
        
            for res in results:
                targs = (res.URL,)
                self._tm.startFunction( target=self._gen_fuzzable_requests, 
                                        args=targs, ownerObj=self )          
            self._tm.join( self )

        return self._fuzzable_requests
    
    def _gen_fuzzable_requests( self, url ):
        '''
        GET the URL and then call createFuzzableRequests with the response.

        @parameter url: The URL to GET.
        '''
        try:
            response = self._urlOpener.GET( url, useCache=True)
        except KeyboardInterrupt, k:
            raise k
        except w3afException, w3:
            om.out.error('Exception while requesting ' + url + ' ' + str(w3) )
        except URLError, url_err:
            om.out.debug('URL Error while fetching page in MSNSpider, error: ' + str(url_err) )
        else:
            fuzzReqs = self._createFuzzableRequests( response )
            self._fuzzable_requests.extend( fuzzReqs )
    
    def getOptions( self ):
        '''
        @return: A list of option objects for this plugin.
        '''
        d2 = 'Fetch the first "resultLimit" results from the Google search'
        o2 = option('resultLimit', self._resultLimit, d2, 'integer')

        ol = optionList()
        ol.add(o2)
        return ol

    def setOptions( self, optionsMap ):
        '''
        This method sets all the options that are configured using the user interface 
        generated by the framework using the result of getOptions().
        
        @parameter OptionList: A dictionary with the options for the plugin.
        @return: No value is returned.
        ''' 
        self._resultLimit = optionsMap['resultLimit'].getValue()

    def getPluginDeps( self ):
        '''
        @return: A list with the names of the plugins that should be runned before the
        current one.
        '''
        return []

    def getLongDesc( self ):
        '''
        @return: A DETAILED description of the plugin functions and features.
        '''
        return '''
        This plugin finds new URL's in MSN search engine.
        
        One configurable parameters exist:
            - resultLimit
        
        This plugin searches MSN for : "@domain.com", requests all search results and parses them in order
        to find new mail addresses.
        '''
