#!/usr/bin/perl -w
#######################################################
# htget
# Written by: Brandon Zehm <caspian@dotconf.net>
#
# License:
#
# htget (hereafter referred to as "program") is free software;
#   you can redistribute it and/or modify it under the terms of the GNU General
#   Public License as published by the Free Software Foundation; either version
#   2 of the License, or (at your option) any later version.
# Note that when redistributing modified versions of this source code, you
#   must ensure that this disclaimer and the above coder's names are included
#   VERBATIM in the modified code.
#
# Disclaimer:
# This program is provided with no warranty of any kind, either expressed or
#   implied.  It is the responsibility of the user (you) to fully research and
#   comprehend the usage of this program.  As with any tool, it can be misused,
#   either intentionally (you're a vandal) or unintentionally (you're a moron).
#   THE AUTHOR(S) IS(ARE) NOT RESPONSIBLE FOR ANYTHING YOU DO WITH THIS PROGRAM
#   or anything that happens because of your use (or misuse) of this program,
#   including but not limited to anything you, your lawyers, or anyone else
#   can dream up.  And now, a relevant quote directly from the GPL:
#
#                           NO WARRANTY
#
#  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
# FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
# OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
# PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
# OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
# TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
# PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
# REPAIR OR CORRECTION.
#
#   ---
#
# Whee, that was fun, wasn't it?  Now let's all get together and think happy
#   thoughts - and remember, the same class of people who made all the above
#   legal spaghetti necessary are the same ones who are stripping your rights
#   away with DVD CCA, DMCA, stupid software patents, and retarded legal
#   challenges to everything we've come to hold dear about our internet.
#   So enjoy your dwindling "freedom" while you can, because 1984 is coming
#   sooner than you think.  :[
#
#######################################################
use strict;
use Socket qw (:DEFAULT :crlf);
$| = 1;



## Global Variable(s)
my %conf = (
    "name"            => "htget",
    "version"         => "0.85",
    "debug"           => "0",
    
    "url"             => "",
    "header"          => "",
    "server"          => "",
    "port"            => "",
    "basicAuth"       => "",
);









#############################
##
##      MAIN PROGRAM
##
#############################


## Process Command Line
processCommandLine();

## Connect
connectTo($conf{'server'}, $conf{'port'});

## Download Page
my $page = getPage($conf{'url'});

## Disconnect
disconnect();

## Print results
print @$page;
quit("",0);
































#########################################################
## SUB: help
##
## hehe, for all those newbies ;)
#########################################################
sub help {
print <<EOM;

$conf{'name'}-$conf{'version'} by Brandon Zehm <caspian\@dotconf.net>

Usage:  $conf{'name'} [options]

  Required:
    --url=<url>           url to check (i.e. www.foobar.com/index.html)
  
  Optional:
    --auth=<login:pass>   use basic authentication on host
    --header              print html header in output

EOM
quit("", 1);
}







######################################################################
##  Function: initialize ()
##  
##  Does all the script startup jibberish.
##  
######################################################################
sub initialize {
  
  ## Intercept signals
  $SIG{'QUIT'}  = sub { quit("$$ - EXITING: Received SIG$_[0]", 1); };
  $SIG{'INT'}   = sub { quit("$$ - EXITING: Received SIG$_[0]", 1); };
  $SIG{'KILL'}  = sub { quit("$$ - EXITING: Received SIG$_[0]", 1); };
  $SIG{'TERM'}  = sub { quit("$$ - EXITING: Received SIG$_[0]", 1); };
  
  ## ALARM and HUP signals are not supported in Win32
  unless ($^O =~ /win/i) {
    $SIG{'HUP'}   = sub { quit("$$ - EXITING: Received SIG$_[0]", 1); };
    $SIG{'ALRM'}  = sub { quit("$$ - EXITING: Received SIG$_[0]", 1); };
  }
  
  return(1);
}









######################################################################
##  Function: processCommandLine ()
##  
##  Processes command line storing important data in global var %conf
##  
######################################################################
sub processCommandLine {
  
  
  ############################
  ##  Process command line  ##
  ############################
  
  my $numargv = @ARGV;
  my $counter = 0;
  for ($counter = 0; $counter < $numargv; $counter++) {
    if ($ARGV[$counter] =~ /^--url=/) {               ## URL  and Server ##
      $ARGV[$counter] =~ s/^--url=//;
      $conf{'url'} = $ARGV[$counter];
      
      ## Do some more parsing on the url to get the server name/port/page separated.
      $conf{'url'} =~ s/^http:\/\///;
      $conf{'server'} = "";
      while ( ($conf{'url'} =~ s/^.//) and ($& ne "/") ) {
        $conf{'server'} .= $&;
      }
      $conf{'url'} = "/" . $conf{'url'};
      
      if ($conf{'server'} =~ /\:/) {
        ($conf{'server'}, $conf{'port'}) = split(":", $conf{'server'});
      }
    }
    
    elsif ($ARGV[$counter] =~ /^--auth=/) {          ## AUTH ##
      $ARGV[$counter] =~ s/^--auth=//;
      my ($login, $pass) = split(":",$ARGV[$counter]);
      $conf{'basicAuth'} = (basicAuth($login, $pass));
    }
    
    elsif ($ARGV[$counter] =~ /^--header$/) {        ## Header ##
      $conf{'header'} = "yes";
    }
    
    elsif ($ARGV[$counter] =~ /^-h$|^--help$/) {     ## Help ##
      help();
    }
    else {                                           ## Invalid Option ##
      quit("ERROR:  The option '$ARGV[$counter]' is unrecognised.\n");
    }
  }
  
  
  
  ###################################################
  ##  Verify required variables are set correctly  ##
  ###################################################
  if (!$conf{'url'}) {
    help();
  }
  if (!$conf{'port'}) {
    $conf{'port'} = 80;
  }
 
  
  return(1);
}












######################################################################
## Function: connectTo($server, $port)
##           Assumes port is 80 if $port is blank.
##           Connects $server:$post to a global socket named
##           SERVER
##           Returns 1 on success 0 on failure.
######################################################################
sub connectTo {
  my %incoming = ();
  ## Get incoming variables
  ( 
    $incoming{'server'},
    $incoming{'port'}
  ) = @_;
  
  print "$$ - connectTo() - sub entry\n" if ($conf{'debug'} > 5);
  my $return = 1;
  
  ## Check incoming variables
  $incoming{'port'} = 80 if (!$incoming{'port'});
  quit("$$ - ERROR: connectTo() Incoming \$server variable was empty.", 1) if ($incoming{'server'} eq "");
  
  ## Open a IP socket in stream mode with tcp protocol. 
  print "$$ - connectTo() - requesting a streaming tcp/ip socket from the system\n" if ($conf{'debug'} > 5);
  socket(SERVER, PF_INET, SOCK_STREAM, getprotobyname('tcp')) || ($return = 0);
  quit("$$ - ERROR: Problem opening a tcp/ip socket with the system.", 1) if ($return == 0);
  
  ## Create the data structure $dest by calling sockaddr_in(port, 32bit IP)
  print "$$ - connectTo() - creating data structure from server name and port\n" if ($conf{'debug'} > 5);
  my $inet_aton = inet_aton($incoming{'server'})
    || quit("$$ - ERROR: Hostname lookup failed.", 1);
  my $dest = sockaddr_in ($incoming{'port'}, $inet_aton) 
    || quit("$$ - ERROR: Calling sockaddr_in() returned an error", 1);
  
  ## Connect our socket to SERVER
  print "$$ - connectTo() - connecting the socket to the server\n" if ($conf{'debug'} > 5);
  connect(SERVER, $dest) || ($return = 0);
  quit("$$ - ERROR: Connection attempt to [$incoming{'server'}:$incoming{'port'}] failed!", 1) if ($return == 0);
  print "$$ - connectTo() - successfully connected to $incoming{'server'}:$incoming{'port'}\n" if (($conf{'debug'} > 5) && ($return));
  
  ## Force our output to flush immediatly after a print.
  print "$$ - connectTo() - setting non-buffering mode on the network connection\n" if (($conf{'debug'} > 5) && ($return));
  select(SERVER);
  $| = 1;
  select(STDOUT);
  
  ## Return success
  print "$$ - connectTo() - sub exit: returning [$return]\n" if ($conf{'debug'} > 5);
  return($return);
}











######################################################################
# SUB: disconnect()
# Closes the SERVER socket.
# Returns 1 on success 0 on failure.
######################################################################
sub disconnect {
  print "$$ - disconnect() - sub entry\n" if ($conf{'debug'} > 5);
  my $return = 1;
  if (!(close SERVER)) {              ## and drop the connection.
    print "$$ - ERROR:  There was an error disconnecting form the server\n";
    $return = 0;                      ## Return failure if we didn't disconnect correctly
  } 
  print "$$ - disconnect() - disconnected from the server successfully\n" if ($conf{'debug'} > 5);
  print "$$ - disconnect() - sub exit: returning [$return]\n" if (($conf{'debug'} > 5) && ($return));
  return($return);                    ## Return
}









######################################################################
## Function: getPage($page)
##           $page is a url, it should not contain http://
##           or www.blah.com, it shoul look like this:
##
##           /site/index.html?name=value&blah=test
##      
##      
##           Returns a reference to an array of lines retrieved  
##           from the web server (i.e. the web page).
##
######################################################################
sub getPage {
  my %incoming = ();
  ## Get incoming variables
  ( 
    $incoming{'page'}
  ) = @_;
  chomp $incoming{'page'};
  
  ## Generate Request
  my $tmp = "";
  
  $tmp =  "GET $incoming{'page'} HTTP/1.0$CRLF" . 
                    "Accept: \*\/\*$CRLF" . 
                    "Host: $conf{'server'}$CRLF" .
                    "User-Agent: $conf{'name'}-$conf{'version'} (Written by Brandon Zehm)$CRLF";
  $tmp .= "Authorization: Basic $conf{'basicAuth'}$CRLF" if ($conf{'basicAuth'} ne "");
  $tmp .= "$CRLF";
  
  ## Request Page
  print SERVER $tmp;
  
  ## Get page from server
  my @tmp = ();
  
  ## Skip HTML header if we're supposed to
  unless ($conf{'header'}) {
    $tmp[0] = 'blah';
    until ($tmp[0] =~ /^$CRLF$/) {
      $tmp[0] = <SERVER>;
    } @tmp = ();
  }
  
  ## Get the rest of the data
  while (<SERVER>) { 
    push (@tmp, $_);
  }
  
  ## Return a reference to the data
  return(\@tmp);
}









######################################################################
##  Function: basicAuth ($login, $pass)
##              $login:  login name
##              $pass:   password
##            
##  Output:   Sets a global variable with the encoded pass-phrase
##            getPage() will use this variable if it is defined
##            when called.
##            
##  Example:  basicAuth ($login, $pass); getPage("/site/secure/index.html");
######################################################################
sub basicAuth {
  my %incoming = ();
  ( 
    $incoming{'login'},
    $incoming{'pass'}
  ) = @_;
  
  $conf{'basicAuth'} = "$incoming{'login'}:$incoming{'pass'}";
  
  ## Base64 encode the login and password
  my $padding = (3 - length($conf{'basicAuth'}) % 3) % 3;                     ## Set flag if binary data isn't divisible by 3
  $conf{'basicAuth'} = substr(pack('u', $conf{'basicAuth'}), 1);              ## Convert the binary to uuencoded text
  chop($conf{'basicAuth'});
  $conf{'basicAuth'} =~ tr|` -_|AA-Za-z0-9+/|;                                ## Translate from uuencode to base64
  $conf{'basicAuth'} =~ s/.{$padding}$/'=' x $padding/e if $padding;          ## Fix the end padding if flag (from above) is set
  
  
  return(1);
}









######################################################################
##  Function: quit (string $message, int $errorLevel)
##  
##  Exits the program, optionally printing $message.  It returns
##  an exit error level of $errorLevel to the system (0 means no
##  errors, and is assumed if empty.)
##
######################################################################
sub quit {
  my %incoming = ();
  (
    $incoming{'message'},
    $incoming{'errorLevel'}
  ) = @_;
  $incoming{'errorLevel'} = 0 if (!defined($incoming{'errorLevel'}));
  
  
  ## Print exit message
  if ($incoming{'message'} ne "") { 
    print "$incoming{'message'}\n";
  }
  
  ## Exit
  exit($incoming{'errorLevel'});
}






