#! /usr/bin/perl -w
#
#  gnump3d-top - Display entries from an Apache common log file.
#
#  GNU MP3D - A portable(ish) MP3 server.
#
# Homepage:
#   http://www.gnump3d.org/
#
# Author:
#  Steve Kemp <steve@steve.org.uk>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
#
#  Steve Kemp
#  ---
#  http://www.steve.org.uk/
#
#


use Env;
use Getopt::Long;
use gnump3d::files;
use strict;



#
###
#
#  Neat Hack - close STDERR and make all writes to it go to STDOUT,
# this way gnump3d will receive all errors if something goes wrong
# in the script execution.
#
###
#
select STDOUT;
$| = 1;
open (STDERR,">&STDOUT");


# Version identifier for this script.
my $VERSION_NUMBER = '$Revision: 1.6 $';

#  The default logfile which we read if there is no
# --config options supplied on the command line.
my $DEFAULT_CONFIG = "";

#
#  First use the users personal configuration file.
#
if ( ( $ENV{"HOME"} ) &&
     ( -e $ENV{"HOME"} . "/.gnump3drc" ) )
{
    $DEFAULT_CONFIG = $ENV{"HOME"} . "/.gnump3drc";
}
elsif ( -e "/etc/gnump3d/gnump3d.conf" )
{
    #
    # Otherwise the system wide one.
    #
    $DEFAULT_CONFIG = "/etc/gnump3d/gnump3d.conf";
}
elsif ( -e "gnump3d.conf" )
{
    # Finally one in the current directory.
    # (This is mainly here for Windows users).
    $DEFAULT_CONFIG = "gnump3d.conf";
}


#
#  Options set by the command line arguments.
#
my $SHOW_LAST       = 0;
my $SHOW_SONGS      = 0;
my $SHOW_DIRS       = 0;
my $SHOW_USERS      = 0;
my $SHOW_LOGINS     = 0;
my $SHOW_HELP       = 0;
my $SHOW_VERSION    = 0;
my $SHOW_PLAIN      = 0;
my $SHOW_AGENTS     = 0;
my $HIDE_DEFAULT    = 0;
my $SHOW_NUMBER     = 20;
my $DEBUG           = 0;
my $LOGFILE_PATTERN = undef;


#
#  These are directories which are handled by plugins, and shouldn't
# be included in the main statistics.
#
my %DEFAULT_DIRS = ( "/"          => 1,
		     "/stats/"    => 1,
		     "/prefs/"    => 1,
		     "/recent/"   => 1,
		     "/load/"     => 1,
		     "/search/"   => 1,
		     "/bug/"      => 1,
		     "/COPYING/"  => 1,
		     "/playlist/" => 1,
		     "/now/"	  => 1
		   );
			 
			

#  Parse the options.
&parseArguments();


#  Start the script proper.
&main();

#  Finished.
exit;




#
#  Call routines to do our work, depending on the state of
# the options that we've been given.
#
sub main()
{
    #
    # Handle the --help, and --version flags first.
    #
    if ( $SHOW_HELP )
    {
        &showHelp();
        exit;
    }
    if ( $SHOW_VERSION )
    {
        &showVersion();
        exit;
    }

    if ( $SHOW_SONGS )
    {
	&showTopSongs( $SHOW_NUMBER );
	exit;
    }
    if ( $SHOW_LAST )
    {
	&showLastSongs( $SHOW_NUMBER );
	exit;
    }
    if ( $SHOW_DIRS )
    {
	&showTopDirs( $SHOW_NUMBER );
	exit;
    }
    if ( $SHOW_USERS )
    {
	&showTopUsers( $SHOW_NUMBER );
	exit;
    }
    if ( $SHOW_LOGINS )
    {
	&showTopLogins( $SHOW_NUMBER );
	exit;
    }
    if ( $SHOW_AGENTS )
    {
	&showTopAgents( $SHOW_NUMBER );
	exit;
    }


    &showHelp();
    exit;

}


#
# Show the top N user agents.
#
sub showTopAgents( $ )
{
    my ( $count ) = (@_);
    my %AGENTS;

    my @lines = getLogfile();
    foreach my $line ( @lines )
    {
	if ( $line =~ / \"([^\"]+)\"$/ )
	{
	    my $entry = $1;
	    $AGENTS{ $entry } ++;
	}
    }

    if ( ! $SHOW_PLAIN )
    {
	print "<tr><td><b>Count</b></td><td><b>User-Agent</b></td></tr>\n";
    }
    else
    {
	print "Count\t\t\tUser-Agent\n";
    }


    my $open = "";
    my $mid  = "\t\t";
    my $close = "";

    if ( ! $SHOW_PLAIN )
    {
	$open = "<tr><td>";
	$mid  = "</td><td>";
	$close = "</td></tr>";
    }

    foreach my $song (sort { $AGENTS{$b} <=> $AGENTS{$a} }
		            keys %AGENTS)
    {
	$count -= 1;

	if ( ! $SHOW_PLAIN  )
	{
	    print $open .  $AGENTS{$song} . $mid . $song . $close . "\n";
	}
	else
	{
	    print $open . $AGENTS{$song} . $mid . $song . $close . "\n";
	}

	return if $ count le 0;
    }
}


#
#  Show the top N songs, along with links to them.
#
#  Note that we take acount of the 'always_stream' setting from the
# configuration file here..
#
sub showTopSongs( $ )
{
    my ( $count ) = (@_);
    my %SONGS;

    my @lines = getLogfile();
    foreach my $line ( @lines )
    {
	#
	# We should only receive GET's anyway .. can't hurt to be
	# paranoid.
	#
	if ( $line =~ /\"GET ([^\"]+)\"/ )
	{
	    my $entry = $1;
	    if (  $entry =~ /\/$/ )
	    {
                $DEBUG && print "Ignoring directory : $entry\n";
	    }
	    elsif ( $entry =~ /^\/search\?[pq]=/ )
	    {
		$DEBUG && print "Ignoring search request: $entry\n";
	    }
	    else
	    {
		if ( &isAudio( $entry ) )
		{
		    #  Here we're avoiding non-audio files - this might
		    # happen if we allow people to serve files from the
		    # theme directories.
		    #
		    $entry = &prettifyEntry( $entry );
		    
		    $SONGS{ $entry } ++;
		}
		else
		{
		    $DEBUG && print "Ignored non-audio file : $entry\n";
		}
	    }
	}
    }

    if ( ! $SHOW_PLAIN )
    {
	print "<tr><td><b>Count</b></td><td><b>Song</b></td></tr>\n";
    }
    else
    {
	print "Count\t\t\tSong\n";
    }


    my $open = "";
    my $mid  = "\t\t";
    my $close = "";

    if ( ! $SHOW_PLAIN )
    {
	$open = "<tr><td>";
	$mid  = "</td><td>";
	$close = "</td></tr>";
    }

    #
    # Handle the always_stream setting.
    #
    my $alwaysStream = alwaysStream();
    if ( $alwaysStream )
    {
	$alwaysStream = ".m3u";
    }
    else
    {
	$alwaysStream = "";
    }

    foreach my $song (sort { $SONGS{$b} <=> $SONGS{$a} }
		            keys %SONGS)
    {
	$count -= 1;

	if ( ! $SHOW_PLAIN  )
	{
	    print $open .  $SONGS{$song} . $mid .
		"<a href=\"" . $song . $alwaysStream . "\">$song</a>" .
	        $close . "\n";
	}
	else
	{
	    print $open . $SONGS{$song} . $mid . $song . $close . "\n";
	}

	return if $ count le 0;
    }
}




#
#  Show the last N songs, along with links to them.
#
#  Note that we take acount of the 'always_stream' setting from the
# configuration file here..
#
sub showLastSongs( $ )
{
    my ( $count ) = (@_);
    my %SONGS;

    my @lines   = getLogfile();
    my @last    = ();
    foreach my $line ( @lines )
    {
	#
	# We should only receive GET's anyway .. can't hurt to be
	# paranoid.
	#
	if ( $line =~ /([^ ]+) (.*)\"GET ([^\"]+)\"/ )
	{
	    my $ip    = $1;
	    my $entry = $3;

	    if (  $entry =~ /\/$/ )
	    {
                $DEBUG && print "Ignoring directory : $entry\n";
	    }
	    elsif ( $entry =~ /^\/search\?[pq]=/ )
	    {
		$DEBUG && print "Ignoring search request: $entry\n";
	    }
	    elsif ( $entry =~ /^\/info/ )
	    {
		$DEBUG && print "Ignoring information request: $entry\n";
	    }
	    else
	    {
		if ( &isAudio( $entry ) )
		{
		    $entry = &prettifyEntry( $entry );

		    push @last, "$ip|$entry";
		} 
		else
		{
		    $DEBUG && print "Ignored non-audio file $entry\n";
		}
	    }
	}
    }

    if ( $SHOW_PLAIN )
    {
	print "Host\t\tSong\n";
    }
    else
    {
	print "<tr><td><b>Host</b></td><td><b>Song</b></td></tr>\n";
    }


    #
    # Handle the always_stream setting.
    #
    my $alwaysStream = &alwaysStream();
    if ( $alwaysStream )
    {
	$alwaysStream = ".m3u";
    }
    else
    {
	$alwaysStream = "";
    }

    my $i = 0;
    @last =  reverse( @last );
    while ( ( $i < $count ) and
	    ( defined( $last[ $i ] ) ) )
    {
	my ( $host, $song );
	if ($last[ $i ] =~ /(.*)\|(.*)/ )
	{
	    $host = &ipToName($1);
	    $song = $2;
	}

	if ( $SHOW_PLAIN )
	{
	    print $host . "\t\t" . $song . "\n";	
	}
	else
	{
	    print "<tr><td>$host</td><td><a href=\"" . $song . $alwaysStream .
		"\">" . $song. "</a></td></tr>\n";
	}

	$i++;
    }
}


#
#  Show the most popular directories.
#
sub showTopDirs( $ )
{
    my ( $count ) = (@_);
    my %SONGS;

    my @lines = getLogfile();
    foreach my $line ( @lines )
    {
	if ( $line =~ /\"GET ([^\"]+)\"/ )
	{
	    my $entry = $1;
	    if ( $entry =~ /\/$/ )
	    {
		$DEBUG && print "Found dir $entry\n";
		$entry = &prettifyEntry( $entry );
		$SONGS{ $entry } ++;
	    }
	}
    }

    if ( ! $SHOW_PLAIN )
    {
	print "<tr><td><b>Count</b></td><td><b>Directory</b></td></tr>\n";
    }
    else
    {
	print "Count\t\t\tDirectory\n";
    }


    my $open = "";
    my $mid  = "\t\t";
    my $close = "";

    if ( ! $SHOW_PLAIN )
    {
	$open = "<tr><td>";
	$mid  = "</td><td>";
	$close = "</td></tr>";
    }

    foreach my $song (sort { $SONGS{$b} <=> $SONGS{$a} }
		            keys %SONGS)
    {
	
	next if ( ( $HIDE_DEFAULT ) &&
		  ( defined( $DEFAULT_DIRS{ $song } ) ) );

	$count -= 1;
	if ( ! $SHOW_PLAIN  )
	{
	    print $open .  $SONGS{$song} . $mid .
		"<a href=\"" . $song . "\">$song</a>" . $close . "\n";
	}
	else
	{
	    print $open . $SONGS{$song} . $mid . $song . $close . "\n";
	}

	return if $ count le 0;
    }
}


#
#  Show the names/ip's of the N clients which have made the most
# requests from us
#
sub showTopUsers( $ )
{
    my ( $count ) = (@_);
    my %USERS;

    my @lines = getLogfile();
    foreach my $line ( @lines )
    {
	if ( $line =~ /([^ ]+) -/ )
	{
	    my $entry = $1;
            $DEBUG && print "Found user $entry\n";
	    $USERS{ $entry } ++;
	}
    }

    if ( ! $SHOW_PLAIN )
    {
	print "<tr><td><b>IP Address</b> (<b>Hostname</b>)</td>";
	print "<td><b>Connection Count</b></td></tr>\n";
    }
    else
    {
	print "IP Address (Hostname)\t\t\tConnection Count\n";
    }


    my $open = "";
    my $mid  = "\t\t";
    my $close = "";

    if ( ! $SHOW_PLAIN )
    {
	$open = "<tr><td>";
	$mid  = "</td><td>";
	$close = "</td></tr>";
    }

    foreach my $song (sort { $USERS{$b} <=> $USERS{$a} }
		            keys %USERS)
    {
	#
	#  Only get the hostnames for the clients that we're going
	# to display.
	#
	my $host = ipToName( $song );

	$count -= 1;
	print $open . $song . " (" . $host . ")" . $mid . $USERS{$song} . $close . "\n";
	return if $ count le 0;
    }
}



#
#  Show the names of the N logged in clients which have made the most
# requests from us
#
sub showTopLogins( $ )
{
    my ( $count ) = (@_);
    my %LOGINS;

    my @lines = getLogfile();
    foreach my $line ( @lines )
    {
	if ( $line =~ /([^ ]+) - (.*) \[/ )
	{
	    my $username = $2;
	    if ( $username ne "-" )
	    {
		$LOGINS{ $username } ++;
	    }
	}
    }

    if ( ! $SHOW_PLAIN )
    {
	print "<tr><td><b>Login Name</b></td>";
	print "<td><b>Connection Count</b></td></tr>\n";
    }
    else
    {
	print "Login Name\t\t\tConnection Count\n";
    }


    my $open = "";
    my $mid  = "\t\t";
    my $close = "";

    if ( ! $SHOW_PLAIN )
    {
	$open = "<tr><td>";
	$mid  = "</td><td>";
	$close = "</td></tr>";
    }

    foreach my $song (sort { $LOGINS{$b} <=> $LOGINS{$a} }
		            keys %LOGINS )
    {
	print $open . $song .  $mid . $LOGINS{$song} . $close . "\n";
	return if $ count le 0;
    }
}


#
#  Is 'always_stream' enabled in the configuration file?
#
sub alwaysStream()
{
  open( CONFIG, "<" . $DEFAULT_CONFIG )
      or die "Cannot open configuration file $DEFAULT_CONFIG : $!";

  my @config = <CONFIG>;
  close( CONFIG );


  my $alwaysStream = 0;

  foreach my $line ( @config )
  {
      next if $line =~ /^\s*#/ ;

      if ( $line =~ /always_stream\s*=\s*(\S*)/ )
      {
	  $alwaysStream = $1;
      }
  }

  return( $alwaysStream );
}



#
#  Return the text of our logfile - this routine takes care of handling
# multiple files, and uncompressing any gzipped files.
#
sub getLogfile()
{

  open( CONFIG, "<" . $DEFAULT_CONFIG )
      or die "Cannot open configuration file $DEFAULT_CONFIG : $!";

  my @config = <CONFIG>;
  close( CONFIG );


  my $logFile = undef;

  foreach my $line ( @config )
  {
      next if $line =~ /^\s*#/ ;

      if ( $line =~ /logfile\s*=\s*(\S*)/ )
      {
	  #
	  # We're only going to care about the first
	  # logfile line we find.
	  #
	  if ( ! defined( $logFile ) )
	  {
	      $logFile = $1;
	  }
      }
  }

  die "Can't find logfile entry in config file" unless defined $logFile;

  open( LOGS, "<" . $logFile ) or die "Cannot open logfile $logFile : $!";
  my @logs = <LOGS>;
  close( LOGS );

  #
  # Now we can add in any additional logfiles we might have been
  # given.
  #
  if (defined ( $LOGFILE_PATTERN ) )
  {
     foreach my $file ( glob( $LOGFILE_PATTERN ) )
     {
	# Is the logfile gzipped?
        if ( $file =~ /gz$/ )
	{
	    # TODO: Will fail if there is no zcat installed.
	    open( LOGS, "zcat $file|" )
		or die "Cannot read piped logfile - $file : $! ";
	}
	else
	{
	    # Open file normally.
	    open( LOGS, "<" . $file )
		or die "Cannot open logfile - $file : $!";
        }

	my @entries = <LOGS>;
        push(@logs, @entries);
        close( LOGS );
     }
  }

  return( @logs );
}


#
#  Parse the command line options.
#
sub parseArguments()
{
    GetOptions(
               "help", \$SHOW_HELP,
               "version", \$SHOW_VERSION,
	       "songs", \$SHOW_SONGS,
	       "last", \$SHOW_LAST,
	       "dirs", \$SHOW_DIRS,
	       "users", \$SHOW_USERS,
	       "logins", \$SHOW_LOGINS,
	       "count=s", \$SHOW_NUMBER,
	       "config=s", \$DEFAULT_CONFIG,
	       "logfiles=s", \$LOGFILE_PATTERN,
	       "plain", \$SHOW_PLAIN,
	       "agents", \$SHOW_AGENTS,
	       "hide", \$HIDE_DEFAULT,
	       "debug", \$DEBUG,
	       );

}


#  Show help for this script.
#
sub showHelp()
{
    showVersion();
    print <<END_OF_USAGE;

Usage: gnump3d-top options

Options:
    --agents           Show the top N user agents.
    --songs            Show the top N songs.
    --dirs             Show the top N directories.
    --hide             Hide the default directories.
    --last             Show the last N songs played.
    --logfiles pattern Set the logfile pattern, for reading multiple logfiles.
    --users            Show the top N users.
    --count=N          Set the number of entries to show.
    --config=file      Set the GNUMP3d config file.
    --version          Show the version number.
    --help             Show this help.
    --debug            Show debugging output.

    --plain            Show output as plain text
END_OF_USAGE
}


#
#  Show the version number of this script.
#
sub showVersion()
{
    my $revision = $VERSION_NUMBER;

    #
    # Extract the version from the CVS revision marker,
    # the only tricky bit is making sure the words "$" Revision " $"
    # don't appear here - because they'd be replaced - this
    # has confused me before.
    #
    if (  $VERSION_NUMBER =~ /\$([a-zA-Z:]+) ([0-9\.]+) \$/ )
    {
        $revision = $2;
    }

    print "gnump3d-top - version $revision - http://www.gnump3d.org/\n";

}


#
#  Normalize a given path, removing duplication "/"'s.
#
sub prettifyEntry($)
{
    my ( $string ) = (@_);

    # Remove duplicate '/' characters.
    while ( $string =~ /(.*)\/\/(.*)/ )
    {
	$string = $1 . "/" . $2;
    }

    # URL decode.
    $string  =~ tr/+/ /;
    $string  =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;

    return( $string );
}


#
#  Convert a given IP address to a hostname.
#
sub ipToName($)
{
    my( $ip ) = ( @_ );

    my @address = gethostbyaddr( pack( 'C4', split(/\./, $ip)), 2 );
    return(  @address > 0 ? $address[0] : $ip );
}
