#!/usr/bin/env python


import re
import urllib2

from lib.common import Common


class Google(Common):
    """
    This class defines methods used to perform Google dorking (command
    line option '-g <google dork>'

    @author: Bernardo Damele
    """

    def __init__(self, proxyHandler):
        self.opener = urllib2.build_opener(proxyHandler)


    def __parsePage(self, page):
        """
        Parse Google dork search results page to get the list of
        HTTP addresses
        """

        matches = []

        try:
            regExpr = "\076\074a href=\042([\w\:\.\/\$\%\?\=\;\ ]+)\042 class=l"
            matches = re.findall(regExpr, page, re.I | re.M)
        except:
            errMsg = "unable to parse Google results page"
            raise Exception, errMsg

        return matches


    def getTestableHosts(self):
        """
        This method returns the list of hosts with parameters out of
        your Google dork search results
        """

        testableHosts = []

        for match in self.matches:
            if re.search("(.*)\?(.+)", match, re.I):
                testableHosts.append(match)

        return tuple(set(testableHosts))


    def getCookie(self):
        """
        This method is the first to be called when initializing a
        Google dorking object through this library. It is used to
        retrieve the Google session cookie needed to perform the
        further search
        """

        try:
            conn = self.opener.open("http://www.google.com/ncr")
            headers = conn.info()
        except urllib2.HTTPError, e:
            headers = e.info()
        except:
            errMsg = "unable to connect to Google"
            raise Exception, errMsg

        googleSetCookie = str(headers.getheader("set-cookie"))
        self.googleCookie = re.search("([\w\.\:\=]+)", googleSetCookie, re.I)

        return self.googleCookie


    def search(self, googleDork):
        """
        This method performs the effective search on Google providing
        the google dork and the Google session cookie
        """

        if not googleDork:
            return None

        url  = "http://www.google.com/search?"
        url += "q=%s&" % self.encodeParams(googleDork)
        url += "num=100&hl=en&safe=off&filter=0&btnG=Search"

        try:
            self.opener.addheaders = [("Cookie", self.googleCookie)]
            conn = self.opener.open(url)
            page = conn.read()
        except urllib2.HTTPError, e:
            page = e.read()
        except:
            errMsg = "unable to connect to Google"
            raise Exception, errMsg

        self.matches = self.__parsePage(page)

        return self.matches

