# -*- coding: ascii -*-

###########################################################################
# clive, video extraction utility
# Copyright (C) 2007-2008 Toni Gundogdu
#
# This file is part of clive.
#
# clive is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# clive is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with clive.  If not, see <http://www.gnu.org/licenses/>.
###########################################################################

## The classes for scanning fed URLs

__all__ = ['Scan']

from clive.util import transl_embed_url
from clive.unicode import tostr
from clive.modules import Modules
from clive.opts import Options

## The scanning class
class Scan:

    ## Constructor
    def __init__(self, say, proxy, callbacks, cache):
        self._opts = Options()._opts
        self._say = say
        self._proxy = proxy
        (self._check_url_cb, self._reset_found_urls_cb,
            self._get_found_urls_cb, self._login_if_needed_cb) = callbacks
        self._cache = cache
        (self._URLGrabber,self._URLGrabError) = Modules().getinst('urlgrabber')

    ## Scan URLs for identifiable video page links
    def scan(self, raw_urls):
        found_videos = {}
        cache_urls = [] # Will contain urls to be stored in the cache
        for url in raw_urls:
            found_urls = []
            # Append 'http://' if missing
            if not url.lower().startswith('http://'): url = 'http://' + url
            self._say('scan: %s' % url)
            self._say('scanning... ', newline=0, flush=1)
            try:
                data = self._fetch_page(url)
                # Search for strings specific to video links
                lookup = [ # Pairs: (search-for, use-domain)
                    # Youtube:
                    ('/watch?v=',               'youtube.com'),
                    ('youtube.com/v/',          ''),
                    # GoogleVideo:
                    ('/googleplayer.swf?docid=','video.google.com'),
                    ('/videoplay?docid=',       'video.google.com'),
                    # ...
                ]
                for (l,host) in lookup:
                    offset = 0
                    while offset != -1:
                        offset = data.lower().find(l, offset)
                        if offset != -1:
                            end = data.find('"', offset)
                            lnk = host + data[offset:end]
                            for e in ['&','<','\n']: # Strip trailing garbage
                                lnk = lnk.split(e,1)[0]
                            # Convert embedded video URL -> video page URL
                            lnk = transl_embed_url(lnk)
                            # Prevent duplicate links in the list
                            if lnk not in found_urls:
                                found_urls.append(lnk)
                                # Print progress: '.' for every found URL,
                                # except for every 5th, which is the number
                                # total found URLs
                                i = len(found_urls); s='.'
                                if not i % 5: s = '%d' % i
                                self._say(s, newline=0, flush=1)
                            offset = end
                self._say(" done.\n")
                # Log-in
                self._login_if_needed_cb(found_urls)
                # Fetch video page and parse it
                self._reset_found_urls_cb()
                for (i,vurl) in enumerate(found_urls):
                    # Callback stores checked URLs to nomad._found_urls
                    # using v_info dictionaries.
                    self._check_url_cb(vurl, (i,len(found_urls)))
                a = self._get_found_urls_cb()
                if len(a) > 0:
                    found_videos[url] = a
                    # Write cache, done here in case user hits Cancel in
                    # the newt interface. Normally this would be done in
                    # Nomad._show_queue.
                    if self._opts.enable_cache:
                        for (i,v) in enumerate(a):
                            self._cache.write(a[i])
            except self._URLGrabError, err:
                self._say('%s [%s]' % (err.strerror,url), is_error=1)
        if len(found_videos) == 0:
            raise SystemExit('error: nothing found')
        # Display for selection
        if self._opts.enable_confirm:
            u = DisplayVideos(found_videos, self._opts)
            return u.get_selection()
        else: # Skip selection and use the whole list
            sel = []
            for e in found_videos:
                for v in found_videos[e]:
                    sel.append(v)
            return sel

    def _fetch_page(self, url):
        g = self._URLGrabber(user_agent = self._opts.http_agent,
            http_headers = (('accept-encoding','gzip'),),
            throttle = self._opts.http_throttle,
            proxies = self._proxy)
        o = g.urlopen(url)
        data = o.read()
        if o.hdr.get('content-encoding') == 'gzip':
            gzip = Modules().getinst('gzip')
            StringIO = Modules().getinst('StringIO')
            data = gzip.GzipFile(fileobj=StringIO(data)).read()
        o.close()
        return data            


## The class that displays the found videos after a scan
class DisplayVideos:

    ## Constructor
    def __init__(self, videos, opts):
        self._last = None # Will hold last selection before hitting "Resize"
        self._videos = videos
        self._opts = opts
        self._newt = Modules().getinst('newt')
        while 1:
            self._selection = self._main()
            if self._selection: break

    ## Returns user selection (sequence)
    def get_selection(self):
        return self._selection

    def _main(self):
        sel = []
        try:
            scr = self._newt.SnackScreen()
            sel = self._show_videos(scr)
        finally:
            scr.finish()
        return sel

    def _ok(self):
        sel = []
        for url in self._ct.getSelection():
            for d in self._videos:
                for v in self._videos[d]:
                    if v['url'] == url:
                        sel.append(v)
        return sel

    def _cancel(self):
        raise SystemExit('Cancelled.')

    def _resize(self):
        self._last = self._ct.getSelection()
        return None

    def _show_videos(self, scr):
        w = scr.width - (scr.width/6)
        ww = w-12
        ct = self._newt.CheckboxTree(scr.height/2, scroll=1)
        i = 0
        for (n,d) in sorted(self._videos.items()):
            for v in self._videos[n]:
                t = v['page_title'].replace('YouTube - ','')[:w]
                try:
                    t = tostr(t)
                except UnicodeDecodeError, err:
                    continue # Skip it if decoding fails for some reason
                ct.append(t)
                # Recall last state: selected | unselected
                sel = False
                if self._last and v['url'] in self._last:
                    sel = True
                u = v['url'][:ww]
                u += ' (%s)' % v['length']
                ct.addItem(u, (i, self._newt.snackArgs['append']),
                    v['url'], selected=sel)
                i += 1                    
        g = self._newt.GridForm(scr, 'Scan: Videos found', 1,2,)
        g.add(ct, col=0, row=0, padding=(0,0,0,1))
        b = self._newt.ButtonBar(scr,
            [('Resize',2), ('Cancel',0), ('Extract',1)], compact=0)
        g.add(b, col=0, row=1, padding=(0,0,0,0))
        self._ct = ct # Store for self._ok
        d = {0:self._cancel, 1:self._ok, 2:self._resize, None:self._ok}
        return d.get(b.buttonPressed(g.runOnce()))()
