package AmphetaDesk::WWW;
###############################################################################
# AmphetaDesk                                           (c) 2000-2002 Disobey #
# morbus@disobey.com                      http://www.disobey.com/amphetadesk/ #
###############################################################################
# ABOUT THIS PACKAGE:                                                         #
#   This package handles all of the various "get crap from the Net" info      #
#   using any scheme supported by LWP::UserAgent, including http, ftp and     #
#   file.                                                                     #
#                                                                             #
#   The internal structure still looks a lot like LWP::Simple and probably    #
#   should and will change.                                                   #
#                                                                             #
# LIST OF ROUTINES BELOW:                                                     #
#   create_ua() - set up an LWP::UserAgent object for our future 'net work.   #
#   get() - grabs all the data (minus headers) from any supported request     #
#   getstore() - saves all the data from a url into a file.                   #
#   head() - grabs all the headers (minus data) from any http request.        #
#   mirror() - stores the passed $url in $file if it has changed.             #
###############################################################################
#                  "and I pushes down the hatches like this!"                 #
###############################################################################

use strict; $|++;
use AmphetaDesk::Settings;
use AmphetaDesk::Utilities;
use LWP::UserAgent;
use HTTP::Date;
require Exporter;
use vars qw( @ISA @EXPORT @EXPORT_OK );
@ISA = qw( Exporter );
@EXPORT = qw( get getstore mirror );

# only when requested so no redefining of CGI.pm.
# this started giving annoying error messages after
# prepending all our common modules in the Templates.
@EXPORT_OK = qw( head );

###############################################################################
# create_ua() - set up an LWP::UserAgent object for our future 'net work.     #
###############################################################################
# USAGE:                                                                      #
#    $ua = create_ua();                                                       #
#                                                                             #
# NOTES:                                                                      #
#    The only reason for this function is that we want to have a single       #
#    consistent user agent for all requests to the outside world that         #
#    AmphetaDesk makes.                                                       #
#                                                                             #
# RETURNS:                                                                    #
#    $ua; an LWP::UserAgent object                                            #
###############################################################################

sub create_ua {

    # dummy object. we set our timeout to what we've specified
    # in our settings (defaulting to 10), and we turn on environment
    # checking for a proxy (located in HTTP_PROXY). we also set our
    # user agent string to ourselves, since we have an ego issue.
    my $ua = LWP::UserAgent->new; $ua->env_proxy();
    $ua->timeout(get_setting("user_request_timeout"));
    my ($app_v, $app_u, $app_o) = (get_setting("app_version"),
                      get_setting("app_url"), get_setting("app_os"));
    $ua->agent("AmphetaDesk/$app_v ($app_o; $app_u)");

    # now, check to see if a user proxy has been defined, and 
    # if so, we set that for our http/ftp protocols.
    my $user_proxy = get_setting("user_proxy_server") || undef;
    $ua->proxy(['http', 'ftp'], $user_proxy) if $user_proxy;

    return $ua;

}

###############################################################################
# get() - grabs all the data (minus headers) from any http request.           #
###############################################################################
# USAGE:                                                                      #
#    $data = get( "http://www.website.com/file.txt" );                        #
#                                                                             #
# NOTES:                                                                      #
#    This routine will take the passed $url and suck down all the data from   #
#    that $url. It also supports redirects if the $url has temporarily or     #
#    permanently moved.                                                       #
#                                                                             #
#    If you're looking to get just the http headers, use head().              #
#                                                                             #
# RETURNS:                                                                    #
#    0; if we cannot load the resource.                                       #
#    $buf; if everything was successful, return all read data.                #
###############################################################################

sub get {

   my ($url) = @_;
   my $ua = create_ua();
   my $req = HTTP::Request->new('GET', $url);

   # if we've got a proxy username and password, use then.
   my $un = get_setting("user_proxy_username") || undef;
   my $pw = get_setting("user_proxy_password") || undef;
   if ($un and $pw and get_setting("user_proxy_server")) {
      $req->proxy_authorization_basic($un, $pw); }

   # set our referer and grab our URL.
   $req->referer( get_setting("user_http_referer") );
   my $res = $ua->request($req);

   # did things go happily or sadly?
   note("Request failed. You need to set proxy authentication details.", 1) if $res->code =~ /407/;
   return 0 unless $res->code =~ /^2/;
   return $res->content;

}

###############################################################################
# getstore() - saves all the data from a url into a file.                     #
###############################################################################
# USAGE:                                                                      #
#    getstore( $url, $file );                                                 #
#                                                                             #
# NOTES:                                                                      #
#    This routine will take the passed $url, suck down all the data and save  #
#    it into $file. It will die silently if $file doesn't exist. Most of the  #
#    work of this routine is done in the &get routine.                        #
#                                                                             #
# RETURNS:                                                                    #
#    0; if the $file can't be opened.                                         #
#    0; if we couldn't get($url).                                             #
#    1; if the file data was saved correctly.                                 #
###############################################################################

sub getstore {

   my ($url, $file) = @_;
   my $data = get($url) or return 0;
   open (DATA, ">$file") or return 0;
   print DATA $data; close (DATA);
   return 1;

}

###############################################################################
# head() - grabs all the headers (minus data) from any http request.          #
###############################################################################
# USAGE:                                                                      #
#    $header_hash = head( "http://www.website.com/file.txt" );                #
#                                                                             #
# NOTES:                                                                      #
#    This routine will take the passed $url and sucks down the first couple   #
#    of bits of data. It looks through this data, creates some common http    #
#    header variables in a hash, and returns it.                              #
#                                                                             #
#    If you're looking for all the data (and no headers), use get().          #
#                                                                             #
# RETURNS:                                                                    #
#    0; if we cannot load the resource.                                       #
#    $headers; if everything was successful, return selected headers.         #
###############################################################################

sub head {

   my ($url) = @_;
   my $ua = create_ua();
   my $req = HTTP::Request->new('HEAD', $url);

   # if we've got a proxy username and password, use then.
   my $un = get_setting("user_proxy_username") || undef;
   my $pw = get_setting("user_proxy_password") || undef;
   if ($un and $pw and get_setting("user_proxy_server")) {
      $req->proxy_authorization_basic($un, $pw); }

   # set our referer and grab our URL.
   $req->referer( get_setting("user_http_referer") );
   my $res = $ua->request($req);

   # did things go happily or sadly?
   note("Request failed. You need to set proxy authentication details.", 1) if $res->code =~ /407/;
   return 0 unless $res->code =~ /^2/;

   # load in our headers.
   my $headers = $res->headers;

   return $headers;

}

###############################################################################
# mirror() - stores the passed $url in $file if it has changed.               #
###############################################################################
# USAGE:                                                                      #
#    mirror( $url, $file );                                                   #
#                                                                             #
# NOTES:                                                                      #
#    mirror() takes a look at the $url and compares the content-length with   #
#    the file size stored on the local machine. If they differ, it downloads  #
#    the file - if they don't, it returns silently. Most of the work in       #
#    mirror() is done in other routines. We also check against the last       #
#    modified time from the webserver, but since some webservers don't report #
#    this, we fall back on the file size for good quality loving.             #
#                                                                             #
# RETURNS:                                                                    #
#    0; if there were no headers from server.                                 #
#    0; if &getstore couldn't save the $file.                                 #
#    0; if the mirror didn't need to occur.                                   #
#    1; if the $file was successfully saved.                                  #
###############################################################################

sub mirror {

   my ($url, $file) = @_;

   # get the size of the $url data
   # or return if we can't get them.
   my $remote_headers = head($url);
   unless ($remote_headers) { return 0; }

   # make the logfile even larger. we only spit out the below
   # if we can't detect the two headers we're interested in.
   unless ($remote_headers->last_modified or $remote_headers->content_length) {
      note("Couldn't find a Last-Modified or Content-Length header.");
   }

   ############################################################################
   # check the modification time of the local file and url.                   #
   ############################################################################
   if (-e $file and $remote_headers->last_modified) {

      # modification time of the local file.
      my $local_http_time = (stat($file))[9] || 0;

      # turn the remote time into epoch seconds too.
      note("Remote Time: " . $remote_headers->header('Last-Modified'));
      note("Remote Epoch: " . $remote_headers->last_modified . " / Local Epoch: $local_http_time.");

      # if remote is newer, download, else return.
      if ($remote_headers->last_modified > $local_http_time) { 
         note("The remote copy has a newer modification date - downloading.");
         getstore($url, $file) or return 0; return 1; 
      }

      # blah, blah, blah! make it bigger, I say!
      note("Local copy has a newer modification date. No download needed.");
      return 0;
   }

   ############################################################################
   # if we're here, then we couldn't find a last modified time, so check      #
   # the file size of the local file and the url as a last resort.            #
   ############################################################################
   elsif (-e $file) {

      # get the remote file size.
      my $remote_size = $remote_headers->content_length;

      # if we can't figure out the remote content-length,
      # set it to 1, so we don't cause a match with the
      # default 0 for the local file size (below)
      $remote_size = 1 unless defined($remote_size);

      # get the local file size.
      my $local_size = -s $file;

      # blah, blah, blah! make it bigger, I say!
      note("Remote Size: $remote_size / Local Size: $local_size.");

      # if they don't match, download.
      if ($remote_size != $local_size) {
         note("The file sizes are different - downloading.");
         getstore($url, $file) or return 0; return 1;
      }

      note("The file sizes are the same. No download needed.");
      return 0;
   }

   ############################################################################
   # just download it.                                                        #
   ############################################################################
   else { 
      note("Our local file doesn't exist. Downloading $url to $file.");
      getstore($url, $file) or return 0; return 1;
   }

   return 1;

}

1;
