#!/usr/bin/perl
###############################################################################
# Josh Yost  joshyost@gmail.com , Steve Rader
# written : 08.15.06
# Updated : 08.18.06
# 
# nagios-summary
# 
#### THE POINT:
# 	This will use the Nagios log file to create a simple cmdline summary 
# of alerts. The sub functions *_hook were adapted from the cnagios.pl file 
# found at : ftp://noc.hep.wisc.edu/pub/src/cnagios/.  My thanks to them!
#
#### STUFF:
# Tested under...
#       - Nagios 1.2, RedHat Linux 7.3 i386, Perl 5.6.1
#	- Nagios-core 2.x i686-gentoo-linux 2006.0, Perl v5.8.8
# 
# Licensed under GPL w/ absolutely no warranty. 
# Please let me know if this helps!
#
# caveats: The functionality of this is obviously anchored to the syntax
#	   of Nagios' logging; if they change it, this breaks.
#
#### VERSION:
# 1.0.10 (contrib. by Steve Rader)
#       - automagically expands to full terminal width
#       - intelligently merges "Hostname" or "Host / Service" and
#         "Plugin Output" 
#       - unknown args now result in usage and exit
#       - Durations have been shortened to XXmYYs or XXhYYm or XXdYYh
#	- pad Date & Time Down/Up dates to two chars 
#         (eg "Wed Aug  2 05:30" instead of "Wed Aug 2 05:30")
#         so Date & Time Down/Up times always line up in columns 
#       - fixes so output columns are always separated by two chars
# 
# 1.0.9 (enough big changes to skip 8)
#	- switched to using a .cf file
#       - added ability to take pipe from STDIN
#       - switched to using format & write instead of printf
#       - user can customize output through the .cf
#	- need to look for time speedups now
#	- changed the print_time fn to allow left & right-justify
# 1.0.7 
#	- changed to a -H,--host and $showhost flag, 
#         so it checks either host or svc alerts - not both (small speed gain)
#	- added a --sort flag to sort on the columns
#	- small logic change in print_time(), in case $day > 9
# 1.0.6 (contrib. by Steve Rader)
#       - output sorted by date & time down
#       - reports only CRITICALs by default, use "-S down" to report host 
#         DOWN alerts
# 1.0.5
#	- changed the date & time output; shortened the output to 98 col
#	- re-worked the output code to use some sub functions
# 1.0.4
#	- implemented $state flag
#	- added some more REs for shrinking output
#	- moved name shrinking to the output loops (small time gain)
#	- added short options, added --nohost option
# 1.0.3
#	- fixed it so it correctly will track multiple down and up service states
#	  in the same log file; it also won't print several CRITICALs in a row 
#	  w/out seeing the corresponding OKs
# 1.0.2
#	- changed it to use REs to shrink the hostname,service, and outputs
#	- moved the host down/up to its own output section
#	- moved the *_hook sub functions into the script
# 1.0.1
#	- added --today and --yesterday flags
#
#### BUGS & PATCHES:
#	mailto: joshyost@gmail.com


use warnings;
use strict;
use File::Basename;
use Getopt::Long;
use Time::Local;
use Term::ReadKey; # FIXME: have ./configure not include this line when Term::ReadKey isn't installed

my $exe  = basename $0;
my $vers = '1.0.10';
Getopt::Long::Configure("bundling");

################################ Functions ################################
sub usage{ die "$exe [-h|--help][-f|--file=<str>][options...][-]\n" }

sub HELP{
	my $die = "$exe [-h|--help][-c|--conf=<str>][-f|--file=<str>][options...]\n"
	. "\n\tThis program will parse your nagios log file and create\n"
	. "a summary of either service or host alerts.  By default, it reads\n"
	. "the file '/var/nagios/nagios.log'.  It can also be set to read from\n"
	. "STDIN by passing a '-' at the end of the options list.\n" 
	. "\n\t-h,--help\t show this help\n"
	. "\t-c,--conf=<str>\t poing to an alternate configuration file\n"
	. "\t\t\t * the default is '/etc/nagiosr.cf'\n"
	. "\t--exclude=<str>  ignore hosts matching your regex\n"
	. "\t-f,--file=<str>\t manually target a different log file\n"
	. "\t\t\t * the default is 'nagios.log'\n"
	. "\t-H,--host\t display the host down alert summary\n"
	. "\t-n,--noauto\t don\'t automatically expand to current terminal width\n"
	. "\t-o,--sort=<str>  sort by desired column (default is timedown):\n"
	. "\t\t\t * (r)timedown|(r)timeup|(r)host|(r)plugin|(r)duration\n"
	. "\t\t\t * you can pass a partial string; the 'r' reverses the sort\n"
	. "\t-s,--soft\t capture soft states as well as hard states\n"
	. "\t-S,--state=<str> grab specific state info: critical|warning|both\n"
	. "\t-t,--today\t capture only today's alert info\n"
	. "\t-v,--verbose\t output some extra debugging info\n"
	. "\t-V,--version\t print version information\n"
	. "\t-y,--yesterday\t capture only yesterday's alert info\n";
	die $die;
}

sub VERS{ die "$exe\t\tv. $vers\n" }

#### Edit and shrink the service plugin output
sub service_plugin_hook {
  local($_) = $_[0]; my $svc_phook = $_[1];
  eval $$svc_phook;  return $_;

}

#### Edit and shrink the host plugin output
sub host_plugin_hook {
  local($_) = $_[0]; my $host_phook = $_[1];
  eval $$host_phook; return $_;
}

#### Edit and shrink host and service names
sub name_hook {
  local ($_) = $_[0]; my $name_hook = $_[1];
  eval $$name_hook;   return $_;
}

#### Calculate duration
sub print_time{
  my ($diff) = @_;
  my ($day,$hr,$min,$sec);

  $sec = $diff % 60;
  $sec =~ s/^(\d)$/0$1/;
  $min = int ($diff / 60);
  if ($min > 59) { 
    $hr = int($min/60); $min %= 60; $min =~ s/^(\d)$/0$1/; 
  }
  if (defined($hr) && $hr > 24){ 
    $day = int($hr/24); $hr %= 24; $hr =~ s/^(\d)$/0$1/;
  }
  if (defined($day))   { return($day . 'd' . $hr . 'h')  }
  elsif (defined($hr)) { return($hr . 'h' . $min . 'm')  }
  else                 { return($min . 'm' . $sec . 's') }
}

#### Merge description and plugin output
sub merge {
  my ($lhs,$rhs,$width) = @_;
  my ($lhs_len,$rhs_len,$pad);

  # trim LHS?
  $lhs_len = length($lhs);
  if ( $lhs_len >= $width ) {
    return(substr($lhs,0,$width));
  }
  # trim RHS?
  $rhs_len = length($rhs);
  if ( $rhs_len >= ($width - $lhs_len - 1) ) {
    $lhs = "$lhs...";
    my $lhs_width = length($lhs);
    my $rhs_width = $width - $lhs_width - 1;
    $rhs = substr($rhs,$rhs_len-$rhs_width,$rhs_width);
    return($lhs . $rhs);
  }
  # concat and pad...
  $pad = $width - $lhs_len - $rhs_len - 1;
  return($lhs . ' ' x $pad . $rhs);
}

############################# Variables #####################################
my ($conf,$file,$help,$showhost,$path,$state,$soft,$sort,
    $today,$verbose,$version,$warnings,$yesterday,$exclude,$no_aw);
my (%host_down,%host_up,%svc_down,%svc_up);
my (%host_output,%svc_output);

# 1.0.9 variables
my  ($name_hook,$svc_phook,$host_phook,$k,@fields);
my  (@host_header,@svc_header,$format,@fmt_len);
our ($timedown,$timeup,$descrip,$plugout,$duration);
# 1.0.10 variables
my  ($descr_and_plugout,$descr_and_plugout_len,$format_spec);
my  ($do_aw,$cols,$non_auto_width);

my @mon = ('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec');
my @days = ('Sun','Mon','Tue','Wed','Thu','Fri','Sat','Sun');

if ( ! GetOptions (
           'soft|s'   => \$soft,  'verbose|v' => \$verbose, 'yesterday|y' => \$yesterday,
           'today|t'  => \$today, 'sort|o=s' => \$sort,     'state|S=s'   => \$state,
	   'help|h'   => \$help,  'version|V' => \$version, 'file|f=s'    => \$file,
	   'conf|c=s' => \$conf,  'host|H'  => \$showhost,  'exclude=s'   => \$exclude,
	   'noauto|n' => \$no_aw) ) {
  &usage();
  exit 1;
}
if ((defined($ARGV[0]) && $ARGV[0] ne '-') || @ARGV > 1) {
  &usage(); 
  exit 1;
}
if ( $no_aw ) { $do_aw = 0; } else { $do_aw = 1; }

# path may get changed after reading .cf
$path = '/var/nagios';
$conf = '/usr/local/etc/nagiosr.cf' if !defined $conf;
(defined($file)) ? ($file = basename $file) : ($file = 'nagios.log');

CASE: {
  if ( !defined($state) )        { $state = 'CRITICAL'; last CASE; }
  if ( $state =~ /^critical$/i ) { $state = 'CRITICAL'; last CASE; }
  if ( $state =~ /^warning$/i )  { $state = 'WARNING'; last CASE; }
  if ( $state =~ /^both$/i )     { $state = 'CRITICAL|WARNING'; last CASE; }
  print "Error: \"$state\" is not a known state.  Use \"critical\", \"warning\" or \"both\"\n";
  exit 1;
}

# Process --sort; allow partial strings
if (defined($sort)){
  if    ('timedown' =~ /^$sort/) { $sort = 'timedown' }
  elsif ('timeup' =~ /^$sort/)   { $sort = 'timeup'   }
  elsif ('host' =~ /^$sort/)     { $sort = 'host'     }
  elsif ('plugin' =~ /^$sort/)   { $sort = 'plugin'   }
  elsif ('duration' =~ /^$sort/) { $sort = 'duration' }
  elsif ('rtimedown' =~ /^$sort/){ $sort = 'rtimedown'}
  elsif ('rtimeup' =~ /^$sort/)  { $sort = 'rtimeup'  }
  elsif ('rhost' =~ /^$sort/)    { $sort = 'rhost'    }
  elsif ('rduration' =~ /^$sort/){ $sort = 'rduration'}
  elsif ('rplugin' =~ /^$sort/)  { $sort = 'rplugin'  }
  else { 
    die "Error: sort must be a string (partially) matching:\n"
        . "(r)timedown, (r)timeup, (r)host, (r)plugin, or (r)duration.\n"
  }
}

HELP() if $help;
VERS() if $version;

########################### Process .cf file ################################
open CF, $conf or die "Error finding $conf: $!\n";
CF:
while (<CF>){
  chomp;
  # skip comments and blank lines
  next if (/^\s*$/ || /^#/);

  my ($var, $exp) = split /\s*=\s*/,$_,2;
  # Strip the '/' on path, if it exists
  if    ($var eq 'path')        { $path = $exp; $path =~ s/\/$//; next; }
  elsif ($var eq 'svc_header')  { @svc_header  = split /,/,$exp; next; }
  elsif ($var eq 'host_header') { @host_header = split /,/,$exp; next; }
  elsif ($var eq 'format'){
    # delay processing format line for auto_width 
    $format_spec = $exp;
    next;
  }
  elsif ($var eq 'name_hook'){
    $name_hook = $exp;
    while (chomp (my $line = <CF>)) { 
      next if ($line =~ /^\s*$/ || $line =~ /^#/); last if ($line eq '.'); $name_hook .= $line;
    }
  }
  elsif ($var eq 'svc_plugin_hook'){
    $svc_phook = $exp;
    while (chomp (my $line = <CF>)) { 
      next if ($line =~ /^\s*$/ || $line =~ /^#/); last if ($line eq '.'); $svc_phook .= $line;
    }
  }
  elsif ($var eq 'host_plugin_hook'){
    $host_phook = $exp;
    while (chomp (my $line = <CF>)) { 
      next if ($line =~ /^\s*$/ || $line =~ /^#/); last if ($line eq '.'); $host_phook .= $line;
    }
  } 
  else{ warn "Unrecognized variable: $var\n" }
}

if ( $do_aw ) { 
  # Get number of columns in this terminal
  ($cols)= (GetTerminalSize())[0];
}

# Process format spec
if ( defined($format_spec) ) {
  $format = "format = \n";
  my @fmt = split /,/,$format_spec,2;
  @fields = split /,/,$fmt[1];
  (s/^\$//) for (@fields);
  # Start cnt one off, since I have to use it as $cnt-1 anyway
  my $cnt = -1;
  if ( $do_aw ) { 
    # Figure out timedown, timeup and duration width in autoformat mode...
    my ($td,$tu,$dp,$du) = split /\s+/,$fmt[0];
    if ( $td =~ /(\d+)$/ ) { $non_auto_width += $1; }
    if ( $tu =~ /(\d+)$/ ) { $non_auto_width += $1; }
    if ( $du =~ /(\d+)$/ ) { $non_auto_width += $1; }
    $non_auto_width += 2;
  }
  # Process the first part of the format line
  for my $val ( split /\s+/,shift(@fmt) ){
    $cnt++;
    if ($val !~ /^(@|\^)(<|\||>)(\d+)$/){
      warn "Error: format string is incorrect.  It should be something like: '@<22', not '$val'\n"
         . "Using default format...\n";
      $format = undef; next CF;
    }
    my ($chara,$charb,$num) = ($1,$2,$3);
    if ( $cnt == 2 ) {
      if ( $do_aw ) {
        $num = $cols - $non_auto_width;
      }
      $descr_and_plugout_len = $num;
    }
    $format .= $chara . $charb x ($num - 2);
    # save number for dashes
    push @fmt_len,$num;
    $format .= '  ';
  }
  # check the number of variables
  if ($fmt[0] !~ /^\$\w+(,\$\w+){$cnt}/){
    $cnt++;
    warn "Error: number of format variables is incorrect, expecting $cnt.\n"
       . "Using default format...\n";
    $format = undef; next CF;
  }
  $format .= "\n" . $fmt[0] . "\n.\n";
  if ( $verbose ) { print "format is...\n$format\n"; }
}
#$descr_and_plugout_len = $fmt_len[2];

########################### Process Log File ################################
if ( !defined($ARGV[0]) ) {  # ...if it's defined, it's verified as '-' for real STDIN above
  open STDIN, "$path/$file" or die "Error finding log \"$path/$file\": $!\n" ;
}

while (<>){
  my ($epoch) = /^\s*\[(\d+)\]/ or die "Error: $path/$file is not in the proper format.\n";
  my @sys_time = localtime time;
  my $midnight = timelocal(0,0,0,$sys_time[3],$sys_time[4],$sys_time[5]);

  # next if epoch is older than 12:00:00am today
  if ($today){ next if ($epoch < $midnight) }
  # next if epoch is older than 12:00:00am yesterday or newer than 11:59:59pm yesterday
  elsif ($yesterday){next if ($epoch < $midnight-86400 || $epoch > $midnight-1) }

  my ($host,$svc,$h_state,$svc_state,$type,$not_num,$msg);
  my @time = localtime $epoch;

  # Fix time formatting
  (s/^(\d)$/0$1/) for ($time[0],$time[1],$time[2]);
  $time[3] = sprintf("%2d",$time[3]);
  my $new_time = "$days[$time[6]] $mon[$time[4]] $time[3] $time[2]:$time[1]";
	
  if (/SERVICE ALERT/ && !$showhost){
    # host;service;state;SOFT|HARD;not. #;msg
    (($host,$svc,$svc_state,$type,$not_num,$msg) = /^\[\d+\]\s+SERVICE ALERT:\s+(.*?);(.*?);(.*?);(.*?);(.*?);(.*)$/) 
      || next;
    my $key = "$host^$svc";
    # test for exclusions
    next if (defined($exclude) && $host =~ /$exclude/);
    if (($svc_state =~ /^($state)$/) || ($svc_state eq 'OK' && defined($svc_down{$key}))){
      if (($soft && $type eq 'SOFT') || $type eq 'HARD'){
        print if $verbose;
	if ($svc_state ne 'OK'){
	  # ignore repetitive alerts (several CRITICALs before an OK)
 	  #   - this must test to see if the _up hash has been created yet and if so, do the
	  #     arrays have the same number of items (they should, once the OK has been seen)
	  next if (defined($svc_down{$key}) && (!defined($svc_up{$key}) ||
                  (defined($svc_up{$key}) && @{$svc_down{$key}} != @{$svc_up{$key}})));
	   push @{$svc_down{$key}}, ($new_time, $epoch, service_plugin_hook($msg,\$svc_phook));
	}
	else{
	  # repetitive OKs don't really matter for the *_up hashes
	  push @{$svc_up{$key}}, ($new_time, $epoch, service_plugin_hook($msg,\$svc_phook));
	}
      }
    }
  }
  elsif (/HOST ALERT/ && $showhost){
    # hostservice;state;SOFT|HARD;not. #;msg
    (($host,$h_state,$type,$not_num,$msg) = /^\[\d+\]\s+HOST ALERT:\s+(.*?);(.*?);(.*?);(.*?);(.*)$/)
      || next;
    # test for exclusions
    next if (defined($exclude) && $host =~ /$exclude/);
    if (($h_state eq 'DOWN') || ($h_state eq 'UP' && defined($host_down{$host}))){
      if (($soft && $type eq 'SOFT') || $type eq 'HARD'){
        print if $verbose;
	if ($h_state ne 'UP'){
	  # ignore repetitive alerts (several DOWNs before an UP)
	  #   - this must test to see if the host_up hash has been created yet and if so, do the
	  #     arrays have the same number of items (they should, once the UP has been seen)
	  next if (defined($host_down{$host}) && (!defined($host_up{$host}) ||
                (defined($host_up{$host}) && @{$host_down{$host}} != @{$host_up{$host}})));
	  push @{$host_down{$host}}, ($new_time, $epoch, host_plugin_hook($msg,\$host_phook));
        }
        else{
          # repetitive OKs don't really matter for the *_up hashes
          push @{$host_up{$host}}, ($new_time, $epoch, host_plugin_hook($msg,\$host_phook));
        }
      }
    }
  }
  else { next }	
}

if ($verbose){ 
  print "$_: @{$svc_down{$_}}\n" for (sort keys %svc_down);
  print "$_: @{$svc_up{$_}}\n" for (sort keys %svc_up);
  print "$_: @{$host_down{$_}}\n" for (sort keys %host_down);
  print "$_: @{$host_up{$_}}\n" for (sort keys %host_up);
}

############################## Create Output ################################
# Create service alert output
if (!defined $showhost){
 for my $key (sort keys %svc_down){
   # Shrink host & service name
   my ($h,$s) = split /\^/,$key,2;
   my $name = name_hook($h, \$name_hook) . ' ' . name_hook($s, \$name_hook);
   while (@{$svc_down{$key}}){
     # shift elements off the arrays
     my ($time_down,$epoch_down,$down_msg) = (shift @{$svc_down{$key}}, shift @{$svc_down{$key}}, shift @{$svc_down{$key}}); 
     my ($time_up,$epoch_up,$up_msg) = (shift @{$svc_up{$key}}, shift @{$svc_up{$key}}, shift @{$svc_up{$key}})
       if (defined($svc_up{$key}) && @{$svc_up{$key}} > 2);

     # Fix output
     $time_up  = '*Still Critical*' if !defined($time_up);  # FIXME: sometimes *Still Warning*
     $epoch_up = time if !defined($epoch_up);
     my $diff  = $epoch_up - $epoch_down;

     # Test for sorting, push data onto hash
     if (defined($sort)){
       if ($sort =~ /timedown/)   { $k = $epoch_down }
       elsif ($sort =~ /timeup/)  { $k = $epoch_up   }
       elsif ($sort =~ /host/)    { $k = $key        }
       elsif ($sort =~ /plugin/)  { $k = $down_msg   }
       elsif ($sort =~ /duration/){ $k = $diff       }
       else { die "Invalid sort parameter - Impossible!\n" }
     }
     else { $k = $epoch_down }
     push @{$svc_output{$k}}, ($time_down,$time_up,$name,$down_msg,print_time($diff));
   }
 }
}
# Create host alert output
else{
 for my $key (sort keys %host_down){
   # Shrink host name
   my $h = name_hook($key, \$name_hook);
   while (@{$host_down{$key}}){
     # shift elements off the arrays
     my ($time_down,$epoch_down,$down_msg) = (shift @{$host_down{$key}}, shift @{$host_down{$key}}, shift @{$host_down{$key}}); 
     my ($time_up,$epoch_up,$up_msg) = (shift @{$host_up{$key}}, shift @{$host_up{$key}}, shift @{$host_up{$key}})
       if (defined($host_up{$key}) && @{$host_up{$key}} > 2); 

     # Fix output
     $time_up  = ' * Still Down * ' if !defined($time_up);
     $epoch_up = time if !defined($epoch_up);
     my $diff  = $epoch_up - $epoch_down;

     # Test for sorting
     if (defined($sort)){
       if ($sort =~ /timedown/)   { $k = $epoch_down }
       elsif ($sort =~ /timeup/)  { $k = $epoch_up   }
       elsif ($sort =~ /host/)    { $k = $h          } 
       elsif ($sort =~ /plugin/)  { $k = $down_msg   }
       elsif ($sort =~ /duration/){ $k = $diff       }
       else { die "Invalid sort parameter - Impossible!\n" }
     }
     else { $k = $epoch_down }
     push @{$host_output{$k}}, ($time_down,$time_up,$h,$down_msg,print_time($diff));
   }
 }
} 

############################## Output Output ################################
# Create sort string - if it begins w/ a 'r', reverse the sort
sub my_sort{
  if (defined $sort){
    if ($sort =~ /duration/ || $sort =~ /timedown/ || $sort =~ /timeup/){
      ($sort =~ /^r/) ? $b<=>$a : $a<=>$b;
    }
    else{ ($sort =~ /^r/) ? $b cmp $a : $a cmp $b }
  }
  else { $a<=>$b }
} 

# Check format
if (defined $format) { no warnings 'all'; eval $format; use warnings 'all'; }
else{
  @fmt_len = (17,17,45,9);
  format = 
@<<<<<<<<<<<<<<<  @<<<<<<<<<<<<<<<  @|||||||||||||||||||||||||||||||||||||||||||  @>>>>>>>
$timedown,$timeup,$descr_and_plugout,$duration
.
}

no strict 'refs';
# Display host info
if (defined($showhost)){ 
  if (%host_output) { 
    # Test & print header
    if (!@host_header){ 
      push @host_header,('Date & Time Down','Date & Time Up','Hostname','Plugin output','Duration');
    }
    for my $var ('timedown','timeup') { $$var = shift @host_header }
    $descr_and_plugout = &merge(shift @host_header, shift@host_header, $descr_and_plugout_len);
    $duration = shift @host_header;
    write;
    # Print dashes
    for my $var qw(timedown timeup) { $$var = '-' x (shift(@fmt_len) - 1); }
    $descr_and_plugout = '-' x $descr_and_plugout_len; shift(@fmt_len);
    $duration = '-' x $descr_and_plugout_len; shift(@fmt_len);
    write;
    # Print alert details
    for my $stamp (sort my_sort keys %host_output){
      while (@{$host_output{$stamp}}){
        for my $var ('timedown','timeup','descrip','plugout','duration'){ 
          $$var = shift @{$host_output{$stamp}}
        }
        $descr_and_plugout = &merge($descrip,$plugout,$descr_and_plugout_len);
        write;
      }
    }
  }
  else { 
    print "No host problems found in log for given time period.\n" if $verbose 
  }
  exit 0;
}

# Display svc info
if (%svc_output) {
  # Test & Print header
  if (!@svc_header){
    push @svc_header,('Date & Time Down','Date & Time Up','Host / Service','Plugin output','Duration');
  }
  for my $var ('timedown','timeup') { $$var = shift @svc_header }
  $descr_and_plugout = &merge(shift @svc_header, shift@svc_header, $descr_and_plugout_len);
  $duration = shift @svc_header;
  write;
  # Print dashes
  for my $var qw(timedown timeup) { $$var = '-' x (shift(@fmt_len) - 1); }
  $descr_and_plugout = '-' x $descr_and_plugout_len; shift(@fmt_len);
  $duration = '-' x $descr_and_plugout_len; shift(@fmt_len);
  write;
  # Print alert details
  for my $stamp (sort my_sort keys %svc_output) {
    while (@{$svc_output{$stamp}}){
      for my $var ('timedown','timeup','descrip','plugout','duration'){ 
        $$var = shift @{$svc_output{$stamp}}
      }
      $descr_and_plugout = &merge($descrip,$plugout, $descr_and_plugout_len);
      write;
    }
  }
}
else { 
  print "No svc problems found in log for given time period.\n" if $verbose 
}
  
exit 0;
