#!/usr/bin/perl -w
# This program is designed for mirroring sDesk to sdesk.europa.com

use strict;

# -- configuration ------------------------------------------------------------
my ($temp_dir)        = '/tmp';
my ($prg_name)        = 'sdesk_mirror_robot';
my ($source)  	      = 'http://192.168.0.3/pp' ;
my ($maximum_tries)   = 10 ;
my ($read_timeout)    = 60 * 2 ;
my ($wait_timeout)    = 60 * 2 ; # 10 * 2 * 2 = 40 min <- max per one file 
my ($grabber) 	      = "wget --tries=$maximum_tries --continue ".
		       "--timeout=$read_timeout --wait=$wait_timeout --level=1";
my ($grabber_out)     = '--output-document=';
        # --tries ..... number of retries 
	# --continue .. continue in iterupted transfers
	# --timeout ... set the read timeout to SECONDS
	# --wait ...... wait betwen retries
        # --level ..... maximum recursion depth
my ($status_file)     = 'download/sdesk.md5sum';
my (@html_files)      = ('sdesk_en.html', 'sdesk_cz.html');
my ($local_md5_file)  = "$temp_dir/$prg_name/saved.sdesk.md5sum";
my ($working_dir)     = "$temp_dir/$prg_name";
my ($site_root)	      = "$temp_dir/sdesk_site";

# -- Global variables ---------------------------------------------------------
my ($msg) = '';

# -----------------------------------------------------------------------------
# -- Print and store all printed messages
sub msgprint{
  my ($_msg, $err_out) = @_;
  if ($err_out) {
    $msg .= $_msg;
    print STDERR "$_msg";
  } else {
    $msg .= $_msg;
    print STDOUT "$_msg";
  };
};


# -----------------------------------------------------------------------------
# -- Try to download specified file
# -- Input:
# --   filename: string
# -- Output:
# --   result:bool, savedas: string, grabber_output: string
sub download_file{
  my ($filename) = @_;

  my ($grabber_output, $cmd, $saveas) = '';
 
  $saveas	  = $filename ; 
  $saveas	  =~s/\//_/g; 
  $saveas         = "$working_dir/$saveas";
  $cmd            = "$grabber $grabber_out$saveas $source/$filename";
  $grabber_output = `$cmd`;

  if (!($?==0)) {
    msgprint("download_file: wget return error code $?\n".
             "download_file: cmd = $cmd\n".
	     "download_file: ---WGET-OUTPUT-MESSAGES---BEGIN---\n".
	     "$grabber_output".
	     "download_file: ---WGET-OUTPUT-MESSAGES---END---\n", 1);
    return (-1);
  };

  return ($?==0, $saveas, $grabber_output);
}; 

# -----------------------------------------------------------------------------
# -- Load md5sum file, parses it and return hash filename => hash
# -- Input:
# --   filename
# -- Output:
# --   ret: bool, md5sum: hash
sub read_md5sum{
  my($filename) = @_;
  
  # open md5file file
  if (!open(MD5SUM, "$filename")){
    msgprint("read_md5sum: Can't open \"$filename\": $!\n", 1);
    return (-1);
  };
  
  my(%md5sum);
  while (<MD5SUM>) {
    if (/([0-9a-fA-F]+)[\s\*]+([\w\_\.\/]+)/){
      $md5sum{$2}=$1;
    };
  };
  
  close(MD5SUM);

  return (1, \%md5sum);
};

# -----------------------------------------------------------------------------
# -- Download status/sdesk.md5sum and parses it to hash, key=filename, val=hash
# -- Input:
# -- Output:
# --   Hash ... see descrition
sub get_status_file{

  # download file with info about actual files
  my ($res, $filename, $wget_res) = download_file($status_file);
  if ($res<1) {
    msgprint("get_status_file: download_file failed\n", 1);
    return (-1);
  };
  
  my ($md5sum);
  ($res, $md5sum)  =  read_md5sum($filename);

  return ($res, $md5sum, $filename);
};

# -----------------------------------------------------------------------------
# -- Read downloaded file and write it with changed links for mirrored site
# -- Input:
# --   Downloaded: String, Mirrored: String
# -- Output:
# --   res: bool
sub change_links_in_html_pages{
  my ($downloaded, $mirrored) = @_;
  
  # open files
  if (!open(IN, "$downloaded")) {
    msgprint("change_links_...: Can't open \"$downloaded\": $!\n", 1);
    return (-1);
  };

  if (!open(OUT, ">$mirrored")) {
    msgprint("change_links_...: Can't open \"$mirrored\": $!\n", 1);
    return (-1);
  };
    
  # read whole input file
  my ($in) = '';
  while (<IN>) { $in .= $_; };
  close(IN);

  my ($source_href) = q/<\s*A\s+HREF\s*=[\s\"]*([^:]+?)[\s\"]*>/;
  my ($href2) = "<\s*A\s+HREF\s*=[\s\"]*$source\/([\w\/\_\.]*sdesk.*?)\s*>";
  # link all to home site
  $in =~ s/$source_href/\<A HREF=\"$source\/$1\"\>/sig;
  # link only sdesk stuff to mirror
  $in =~ s/$href2/<A HREF=\"$1>/sig;
  # switch mirror to main site
  $in =~ s/<!--MAINSITE BEGIN_TAG-->.*?<!--MAINSITE END_TAG-->//sig;
  $in =~ s/<!--MIRRORSITE BEGIN_TAG(.*?)MIRRORSITE END_TAG-->/$1/sig;
  $in =~ s/<A HREF=http:\/\/sDesk\.europa\.com\//<A HREF=$source\//sig;
 
  # write "fixed" file
  print OUT $in;
  close (OUT);
  `rm $downloaded`;
  return (1);
};

# -- MAIN ---------------------------------------------------------------------
sub main{
  # make working dir
  if (!(mkdir($working_dir, 0777) or ($!==17))) {
    msgprint("main: Can't make working dir: $!\n", 1);
    return (-1);
  };

  # load remote md5file
  msgprint("downloading remote md5file => ", 0);
  my ($res, $md5sum_remote, $remote_md5_file) = get_status_file();
  if ($res<1) {
    msgprint("\n",0);
    msgprint("main: get_status_file failed\n", 1);
    return (-1);
  };
  msgprint("$remote_md5_file\n", 0);
  
  # load local md5file, error is ignored because if reading fail,
  # we will supose that no files are actualy localy avail => mirror all
  my ($md5sum_local);
  ($res, $md5sum_local) = read_md5sum($local_md5_file);

  # check changed files - add them to download list
  my ($filename, $file_md5, @download_list); 
  LOOP:
  while (($filename, $file_md5) = each(%$md5sum_remote)) {
    if (defined($md5sum_local->{$filename})) {
      if ($md5sum_local->{$filename} eq $file_md5) {
        next LOOP;
      };
    };
    push @download_list, ($filename);
  };

  # if something changed - download html files
  if (@download_list > 0) {
    push @download_list, @html_files;
  } else {
    msgprint("main: all files are update or no file available - exiting\n", 1);
    `rm $remote_md5_file`;
    return (0);
  };

  # download changed files
  my ($download_ok) = 1;
  my (%downloaded_list, $downloaded_file);
  foreach $filename (@download_list) {
    msgprint("downloading $filename => ", 0);
    ($res, $downloaded_file) = download_file($filename);
    if (!$res) {
      msgprint("\n", 0);
      msgprint("main: download of file \"$filename\" failed", 1);
      return (-1);
    };
    msgprint("$downloaded_file\n", 0); 
    $downloaded_list{$downloaded_file} = $filename;
  };

  # all files were downloaded correctly -> put them to right place
  # create folders
  `mkdir -p $site_root/download/stable`;
  while (($downloaded_file, $filename) = each (%downloaded_list)) {
    # move new file to actual file
    if ($filename =~ /\.html/) {
      msgprint("parsing $downloaded_file => $site_root/$filename\n", 0);
      change_links_in_html_pages($downloaded_file, "$site_root/$filename");
    } else {
      msgprint("moving $downloaded_file => $site_root/$filename\n", 0);
      `mv -f $downloaded_file $site_root/$filename`;
    };
  };
  # save status file
  `mv -f $remote_md5_file $local_md5_file`;
  return (1);
};

my ($res) = main();

if ($res<0) { 		# some error occur
  print "mail: error\n";
} elsif ($res==0) {	# no new files
  print "mail: no new files\n";
} else {		# ok
  print "mail: downloaded ok\n";
};
