use LWP::Simple;
use Time::Local;
use Date::Calc qw(Decode_Date_US Now Today);
use Win32::ODBC;
use Date::Manip qw(ParseDate DateCalc UnixDate);

# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

# Subroutines

sub WriteToMDB {

   my $DateTimeStamp;
   my $LiteralDateTimeStamp;

   #($Year, $Month, $Day) = Decode_Date_US($TheDate);
   #print "$Day, $Month, $Year \n";   

   ($Waconia_Hour, $Waconia_Min, $Waconia_Sec) = Now();
   ($Waconia_Year, $Waconia_Month, $Waconia_Day) = Today();

   # The following scaler, PerlTimeTxt is useful for having a single field to
   # characterize time. Note this function expects months to start at zero and
   # years to be relative to 1900.

   $PerlTimeTxt = timelocal(0,($Waconia_Min),($Waconia_Hour),$Waconia_Day,($Waconia_Month-1),($Waconia_Year));

   # Create the sql statement for inserting a record of data into the mdb file.

   # If there is a funny 24 in the time stamp, convert it to a more normal 0 to 23 convention...
   # (apparently this never gets used...)
   if ($TheHour == 24) {
      $TheDate = UnixDate( Date::Manip::DateCalc( $TheDate, "+ 1 days" ), "%m/%d/%Y"); 
      $TheHour = 0;
   } 
   
   # If there's no data (i.e. N/A), set it to Null (else surround the value in single quotes)
   if ($WindDir eq "N/A") {$WindDir = "Null";} else {$WindDir = "'$WindDir'";}
   if ($WindAvg eq "N/A") {$WindAvg = "Null";} else {$WindAvg = "'$WindAvg'";}
   if ($WindMax eq "N/A") {$WindMax = "Null";} else {$WindMax = "'$WindMax'";}
   if ($TempAvg eq "N/A") {$TempAvg = "Null";} else {$TempAvg = "'$TempAvg'";}
   if ($DewPoint eq "N/A") {$DewPoint = "Null";} else {$DewPoint = "'$DewPoint'";}
   if ($BPAvg   eq "N/A") {$BPAvg   = "Null";} else {$BPAvg   = "'$BPAvg'";}

   $DateTimeStamp = $TheDate . " " . $TheHour . ":" . $TheMin;
   $LiteralDateTimeStamp = $ParsedDate;
   
   $sql = "INSERT INTO FifteenMinData (PerlTime, DateTimeStamp, LiteralDateTimeStamp, TimeMDY, TimeHr, TimeMin, StationNumber, StationName, ";
   $sql .= "WindDirection, WindSpeed, WindGust, ";
   $sql .= "TempAvg, DewPoint, Pressure) ";
   $sql .= "VALUES ('$PerlTimeTxt','$DateTimeStamp','$LiteralDateTimeStamp','$TheDate','$TheHour','$TheMin','$StationNumber','$TheCity',";
   $sql .= "$WindDir,$WindAvg,$WindMax,";
   $sql .= "$TempAvg, $DewPoint, $BPAvg)";

   # print "SQL = $sql\n";

   # The following lines are handy for testing the sql (i.e. adding one
   # variable at a time until the error...).

   #$sql = "INSERT INTO FifteenMinDataTesting (PerlTime, TimeMDY, TimeHr, TimeMin, StationNumber, StationName, WindDirection, Windspeed, WindGust) ";
   #$sql .= "VALUES ('$PerlTimeTxt','$TheDate','$TheHour','$TheMin','$StationNumber', '$TheCity', '$WindDir', '$WindAvg', '$WindMax')";

   # If all values are reasonable, write the record...
   
   if ((($TempAvg >= -20) && ($TempAvg <=  120)) && 
       (($WindAvg >=   0) && ($WindAvg <=  110)) &&
       (($WindMax >=   0) && ($WindMax <=  130)) &&
       ((($BPAvg >= 28.50) && ($BPAvg <= 31.00)) || ($BPAvg == 0)) &&
       (($TheHour >= 0) && ($TheHour <= 24)) &&
       (($TheMin >= 0) && ($TheMin <= 59)) ) {

      #This clause (from above if) may be causing unnecessary blockage of data...
      #($TheHour <= $Waconia_Hour) &&

      # For now, clean out the database first before writing so don't have to
      # do manual deletes during testing. WARNING, this is dangerous code.
      # Never point this one at the main database.

      #$cnTelem->Sql("DELETE * FROM FifteenMinDataTesting");

      # Print to the screen what you are trying to write to the database. 
      
      print "$TheDate, $TheHour, $TheMin, $StationNumber, $TheCity, $TempAvg, $DewPoint, $WindStuff, $WindDir, $WindAvg, $WindMax, $BPAvg\n";

      # Execute the sql

      # First write to the database on Waconia

      # I have added error checking here.... Note that the Sql
      # method returns undefined if it is successful and a non zero integer
      # error number if it fails.

      if ($TelemOK) {
         if ($cnTelem->Sql($sql)) {
            # If not the -1605 error message (duplicate records) then log it.
            if (!($cnTelem->Error() =~ /-1605/)) {
               print OUTPUTFILE localtime(time) . " Waconia SQL failure, Error: " . $cnTelem->Error() . ", SQL=" . $sql . "\n";
               print "  Waconia Err   : " . $cnTelem->Error() . "\n";
            } else {
               print "  Waconia:   SQL failed (probably the record already exists).\n";
            }
         }
      }
      
      # # Then write to PublicWeb
      # if ($TelemXOK) {
      #    if ($cnTelemX->Sql($sql)) {
      #       # If not the -1605 error message (duplicate records) then log it.
      #       if (!($cnTelemX->Error() =~ /-1605/)) {
      #          print OUTPUTFILE localtime(time) . " PublicWEB SQL failure, Error: " . $cnTelemX->Error() . ", SQL=" . $sql . "\n";
      #          print "  PublicWEB Err : " . $cnTelemX->Error() . "\n";
      #       } else {
      #          print "  PublicWEB: SQL failed (probably the record already exists).\n";
      #       }
      #    }
      # }

      # Write out the TimeMDY date to a special table that is used to quickly
      # populate the date combo box

      $sql = "INSERT INTO DaysGleaned (TimeMDY) VALUES ('$TheDate')";
      if ($TelemOK) {$cnTelem->Sql($sql);}
      # if ($TelemXOK) {$cnTelemX->Sql($sql);}
      if ($TelemX2OK) {$cnTelemX2->Sql($sql);}

   } else { # Here is the corresponding else of the if-values-are-reasonable block.
      print "$TheDate, $TheHour, $TheMin, $StationNumber, $TheCity, $TempAvg, $DewPoint, $WindStuff, $WindDir, $WindAvg, $WindMax, $BPAvg\n";
      print "  Record failed reasonable test\n";
   }
}

sub getpage {

   my $URL = $_[0];

   # The next 9 elements passed in are supplied by the Now array in the main
   # body of this script.

   my @Now = ($_[1],$_[2],$_[3],$_[4],$_[5],$_[6],$_[7],$_[8],$_[9]);

   # Get the page of data. Note the get function from 'LWP::Simple' returns
   # undefined on error, so check for errors as follows....

   $foundPage = True;
   unless (defined ($content = get $URL)) {
      print OUTPUTFILE "Timestamp = @Now[5,4,3,2,1]\n";
      print OUTPUTFILE "Could not get $URL\n";
      close(OUTPUTFILE);
      $foundPage = False;
   }
}

sub dir360 {

   my $dirstring = $_[0];
   my $WindDir;

   if    ($dirstring eq "N" ) { $WindDir = 360; } 
   elsif ($dirstring eq "NE") { $WindDir =  45; } 
   elsif ($dirstring eq "E" ) { $WindDir =  90; } 
   elsif ($dirstring eq "SE") { $WindDir = 135; } 
   elsif ($dirstring eq "S" ) { $WindDir = 180; } 
   elsif ($dirstring eq "SW") { $WindDir = 225; } 
   elsif ($dirstring eq "W" ) { $WindDir = 270; } 
   elsif ($dirstring eq "NW") { $WindDir = 315; } 
   else                       { $WindDir = "N/A"; }
   return $WindDir;
}

sub parseandwrite {

   $TheCity = $_[0];
   
   # This offset is used to handle city names that have two words, for example
   # "THE DALLES"

   my $offset = $_[1];

   # The station number

   $StationNumber = $_[2];

   # Split the page into lines

   @thelines = split (/\n/, $content);
   $nlines = @thelines;
   #print "$nlines \n";

   # Parse it

   foreach (@thelines) {

      if (/PDT/ or /PST/) {

         # Split the line into words.

         @thewords = split(" ", $_);

         # Set this timezone parameter so the call to ParseDate works.
         # 
         # Note that this does NOT override the TZ value that is set in 
         # C:\Perl\site\lib\Date\manip.pm
         # I only set it here in case it is removed in manip.pm, then I correct
         # for the 2 hour difference down below where I shift back to 
         # standard time.
         #
         # Changes for MN (from PST8PDT)
         # PST8PDT, Pacific time. MST7MDT, Mountain time. CST6CDT, Central time. EST5EDT, Eastern time
         #$TZ = "PST8PDT";
         $TZ = "CST6CDT";

         # Substitute, so can get that silly ":" in there. First grab the
         # first 4 characters. Note there is a space at the beginning of the
         # line so must go 1 to 5 not 0 to 4

         $thetime = substr($_,1,5);
         #print " thetime = $thetime \n";
	 
         # If 1000, 10:00, do a special form of the substitution.

         if ($thetime =~ /000/) { 
            $_ =~ s/000/0:00/;
         } else {
            $_ =~ s/00/:00/;
         }
         #print " Raw stuff = $_ \n";

         # Parse the date to get around the problem of their am/pm format.

         #print "TZ = $ENV{'TZ'}\n";
         $ParsedDate = ParseDate($_);
         #print "ParseDate = $ParsedDate\n";

         # Subtract an hour to get back to standard time.
         # And subtract two hours to get back to Pacific time because DateCalc
         # tries to present the result in the local time zone.

         if ($thewords[2] eq "PDT") {
            $PSTDate = DateCalc($ParsedDate,"- 3hours",\$err);
         } else {
            $PSTDate = DateCalc($ParsedDate,"- 2hours",\$err);
         }
         #print "PSTDate = $PSTDate \n";

         # Use this formating function to return a date string

         $TheDate = UnixDate($PSTDate, "%m/%d/%y");
 
         # Finally get the hour and min with a simple substring call

         $TheHour = substr($PSTDate,8,2) + 0;
         $TheMin = substr($PSTDate,11,2) + 0;
         #print " $TheDate, $TheHour, $TheMin \n";

      }

      # Look for the first occurrence of the city where the NOT AVBL string is
      # absent. Not that the LAST statement below will cause a break out of
      # the for loop; that is the mechanism I use to prevent the city from
      # being parsed twice if it shows more than once on the page.

      # Also if you're worried about getting the wrong date associated with a
      # city match farther down on the page, don't. If a city match is made in
      # a secondary report on the page, the date match will have been updated
      # also; it keeps trying to update the date for each line in the for loop.
      # So don't be afraid to use urls that have multiple reports on the page.

      if ((/$TheCity/) && !(/NOT AVBL/)) {
         # The sky cover is two words only for "LGT RAIN"

         if (/LGT RAIN/ || /LGT SNOW/) {$offset++}
         @thewords = split(" ", $_);

         # If there's a letter at the end of the pressure string, remove it.
         # Otherwise just take the whole thing.

         $PressureStuff = $thewords[6 + $offset];
         $PressureStuff =~ /[a-zA-Z]/g;
         
         if (defined ($FirstLetter = pos($PressureStuff))) {
            $BPAvg = substr($PressureStuff, 0, $FirstLetter - 1);
         } else {
            $BPAvg = $PressureStuff;
         }
	
         # Not using this anymore, but left here just for information... The
         # following use of substr returns all but the last character in the
         # target string (If the third parameter is negative, it leaves that
         # many off the end of the target string). Target, offset, length. 

         #$BPAvg = substr($thewords[6 + $offset],0,-1);

         $TempAvg = $thewords[2 + $offset];
         $DewPoint = $thewords[3 + $offset];
         $WindStuff = $thewords[5 + $offset];

         # This next block of code serves to interpret the windstuff string.

         # Check for CALM

         if ( ($WindStuff =~ /CALM/g) || ($WindStuff =~ /MISG/g) ) { 
            $WindAvg = 0;
            $WindMax = 0;
            $WindDir = "N/A";
         } else {

            # Split up the windstring into 4 parts.
            ($WindDir_string,$WindAvg,$Gust,$WindMax) = $WindStuff =~ /(\D+)(\d+)(\D+)?(\d+)?/;

            # Translate the winddirection string into a number.
            $WindDir = &dir360($WindDir_string);

            # If can't find the G (for Gust) in the wind string, just set the
            # max to the avg value
            if (!defined($Gust)) { 
               $WindMax = $WindAvg;
            }
         }

         # The following call, to write out to the database, only gets run if
         # the city match above was successful. This should avoid all problems
         # associated with writing garbage (undefined scalars) if no match is
         # found.

         WriteToMDB;

         # Use this "last" statement to exit the for loop. Because there are
         # multiple line with these city strings. This way we only use the
         # first find with data.

         last;
      }

   }

}


# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

# Main Program Body

# Open the log file. Note the '$!' variable returns system errors.

@Now = localtime(time);

open(OUTPUTFILE, '>>C:\Users\Jim\Documents\webcontent\waconia\rosielog.txt') or do {
   print "Timestamp = @Now[5,4,3,2,1]\n";
   print "Could not find rosielog.txt\n";
   die "On open (Jdm): $!";
};  # This semicolon is important here...

print OUTPUTFILE localtime(time) . " from getnoaa.pl " . "\n";

# Make database connection objects and point them at the mdb files. Set flags
# to 0 if there is a problem. 

# If on WhiteBear, use this DSN 
# $DSN = "DRIVER={Microsoft Access Driver (*.mdb)};DBQ=C:/Perl/proj/waconia/telem.mdb;UID=admin";
# If on Physics2, use this DSN
#$DSN = "DRIVER={Microsoft Access Driver (*.mdb)};DBQ=C:/code/python/scraper/telem.mdb;UID=admin";  # Home
$DSN = "DRIVER={Microsoft Access Driver (*.mdb)};DBQ=C:/Users/Jim/Documents/webcontent/waconia/data/telem.mdb;UID=admin";  # Physics
$TelemOK = 1;
if (!($cnTelem = new Win32::ODBC($DSN))){
   print OUTPUTFILE "Timestamp = @Now[5,4,3,2,1]\n";
   print OUTPUTFILE "Error connecting to $DSN\n";
   print OUTPUTFILE "Error: " . Win32::ODBC::Error() . "\n";
   $TelemOK = 0;
   warn "Error connecting to $DSN\n";
}

# $DSNX = "DRIVER={Microsoft Access Driver (*.mdb)};DBQ=//webfarm03/web/waconia/data/telem.mdb;UID=admin";
# $TelemXOK = 1;
# if (!($cnTelemX = new Win32::ODBC($DSNX))){
#     print OUTPUTFILE "Timestamp = @Now[5,4,3,2,1]\n";
#     print OUTPUTFILE "Error connecting to $DSNX\n";
#     print OUTPUTFILE "Error: " . Win32::ODBC::Error() . "\n";
#     $TelemXOK = 0;
#     warn "Error connecting to $DSNX\n";
# }


# Get the data and send it to the database.

#getpage("http://iwin.nws.noaa.gov/iwin/wa/hourly.html", @Now);
#getpage("http://www.weather.gov/view/prodsByState.php?state=WA&prodtype=hourly", @Now);
getpage("http://forecast.weather.gov/product.php?site=CRH&product=RWR&issuedby=WA", @Now);

if ($foundPage){
   parseandwrite("PORTLAND",    1, 51);
   parseandwrite("THE DALLES",  2, 52);
   parseandwrite("PASCO",       0, 53);
   parseandwrite("HERMISTON",   1, 54);
   parseandwrite("WALLA WALLA", 1, 55);
   parseandwrite("ELLENSBURG",  0, 56);
   parseandwrite("MOSES LAKE",  1, 57);
}

# getpage("http://www.wrh.noaa.gov/pendleton/data/text/pdxhwrsch.html", @Now);
# if ($foundPage){
#    parseandwrite("BOARDMAN",    1, 58);
#    parseandwrite("IRRIGON",     1, 60);
#    parseandwrite("UMATILLA",    1, 61);
#    parseandwrite("ROOSEVELT",   0, 62);
#    parseandwrite("CELILO",      1, 63);
#    parseandwrite("RUFUS",       1, 64);
# }

#getpage("http://iwin.nws.noaa.gov/iwin/or/hourly.html", @Now);
#getpage("http://www.weather.gov/view/prodsByState.php?state=OR&prodtype=hourly", @Now);
getpage("http://forecast.weather.gov/product.php?site=CRH&product=RWR&issuedby=OR", @Now);

if ($foundPage){
   parseandwrite("NORTH BEND",  1, 65);
}

# The following two lines of code are useful for testing the regular expression.
# It's just a test page on a local server.

#getpage("http://waconia.pnl.gov/TestWaconiaParse.htm", @Now);
#parseandwrite("RUFUS2",       1, 64);

# Close the database and file connections

if ($TelemOK)  {$cnTelem->Close();}
# if ($TelemXOK) {$cnTelemX->Close();}
if ($TelemX2OK) {$cnTelemX2->Close();}
close(OUTPUTFILE);