# getnoaa.pl
# Jim Miller, 9:31 PM Tue March 21, 2023

use LWP::Simple;
use Time::Local;
use Date::Calc qw(Decode_Date_US Now Today);
use Win32::ODBC;
use Date::Manip qw(ParseDate DateCalc UnixDate);
use DateTime;
use JSON;

#use strict;

my ($WindDir, $WindAvg, $WindMax, $TempAvg, $DewPoint, $BPAvg);

my $googleSheet_data;
my %shortNames;

# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

# Subroutines

sub cS {
   # clearString (cS)
   # This prepares values before writing to the Google sheet.
   # It translates the Null to an empty string.
   my $stringValue = $_[0];
   
   if ($stringValue eq "Null") {
      $stringValue = "";
   } 
   return $stringValue;
}

sub WriteToMDB {
   my $DateTimeStamp;
   my $LiteralDateTimeStamp;
   
   my ($Waconia_Year, $Waconia_Month, $Waconia_Day, $Waconia_Hour, $Waconia_Min, $Waconia_Sec);
   
   my @newRow_googleSheet;

   ($Waconia_Hour, $Waconia_Min, $Waconia_Sec) = Now();
   ($Waconia_Year, $Waconia_Month, $Waconia_Day) = Today();

   # The following scaler, PerlTime, is useful for having a single field to
   # characterize time. Note this function expects months to start at zero and
   # years to be relative to 1900.
   $PerlTime = timelocal(0,($Waconia_Min),($Waconia_Hour),$Waconia_Day,($Waconia_Month-1),($Waconia_Year));

   # Create the sql statement for inserting a record of data into the mdb file.

   # If there is a funny 24 in the time stamp, convert it to a more normal 0 to 23 convention...
   # (apparently this never gets used...)
   if ($TheHour == 24) {
      $TheDate = UnixDate( Date::Manip::DateCalc( $TheDate, "+ 1 days" ), "%m/%d/%Y"); 
      $TheHour = 0;
   } 
   
   # If there's no data (i.e. N/A), set it to Null string (for SQL).
   if ($WindDir eq "N/A") {$WindDir = "Null"};
   if ($WindAvg eq "N/A") {$WindAvg = "Null"};
   if ($WindMax eq "N/A") {$WindMax = "Null"};
   if ($TempAvg eq "N/A") {$TempAvg = "Null"};
   if ($DewPoint eq "N/A") {$DewPoint = "Null"};
   if ($BPAvg eq "N/A") {$BPAvg = "Null"};

   $DateTimeStamp = $TheDate . " " . $TheHour . ":" . $TheMin;
   $LiteralDateTimeStamp = $ParsedDate;
   
   # Note that in the VALUES part of the SQL, strings and dates are quoted, numbers are not.
   $sql = "INSERT INTO FifteenMinData (PerlTime, DateTimeStamp, LiteralDateTimeStamp, TimeMDY, TimeHr, TimeMin, StationNumber, StationName, ";
   $sql .= "WindDirection, WindSpeed, WindGust, ";
   $sql .= "TempAvg, DewPoint, Pressure) ";
   $sql .= "VALUES ($PerlTime,'$DateTimeStamp','$LiteralDateTimeStamp','$TheDate',$TheHour,$TheMin,$StationNumber,'$TheCity',";
   $sql .= "$WindDir,$WindAvg,$WindMax,";
   $sql .= "$TempAvg,$DewPoint,$BPAvg)";

   # print "SQL = $sql\n";

   # If all numeric values are reasonable, write the record...
   # Note: one of these raw values may be an "N/A" string. Those cases always evaluate true and do not block the write. 
   if ((($TempAvg >= -20) && ($TempAvg <=  120)) && 
       (($WindAvg >=   0) && ($WindAvg <=  110)) &&
       (($WindMax >=   0) && ($WindMax <=  130)) &&
       ((($BPAvg >= 28.50) && ($BPAvg <= 31.00)) || ($BPAvg == 0)) &&
       (($TheHour >= 0) && ($TheHour <= 24)) &&
       (($TheMin >= 0) && ($TheMin <= 59)) ) {

      # Print the intended write to the database.
      print "$TheDate, $TheHour, $TheMin, $StationNumber, $TheCity, $TempAvg, $DewPoint, $WindStuff, $WindDir, $WindAvg, $WindMax, $BPAvg\n";

      # Execute the sql

      # First write to the database on Waconia

      # I have added error checking here.... Note that the Sql
      # method returns undefined if it is successful and a non zero integer
      # error number if it fails.

      if ($TelemOK) {
         if ($cnTelem->Sql($sql)) {
            # If not the -1605 error message (duplicate records) then log it.
            if (!($cnTelem->Error() =~ /-1605/)) {
               print OUTPUTFILE localtime(time) . " Waconia SQL failure, Error: " . $cnTelem->Error() . ", SQL=" . $sql . "\n";
               print "  Waconia Err   : " . $cnTelem->Error() . "\n";
            } else {
               print "  Waconia:   SQL failed (probably the record already exists).\n";
            }
            
         } else {
            print "SQL write succeeded, ";
            
            # Prepare to add a new row to the googleSheet_data array.
            
            ($TheYear, $TheMonth, $TheDay) = Decode_Date_US($TheDate);
            
            my $pt_time = DateTime->new(
                year => $TheYear,
                month => $TheMonth,
                day => $TheDay,
                hour => $TheHour,
                minute => $TheMin,
                second => 0,
            );
            
            my $pt_string = $pt_time->strftime('%m/%d/%Y %H:%M:%S');
            print "PST= $pt_string, ";
            
            # This might have to be checked for daylight time in this NOAA data.
            my $utc_time = $pt_time->clone->add(hours => 8);
            
            my $utc_string = $utc_time->strftime('%m/%d/%Y %H:%M:%S');
            print "UTC= $utc_string\n";
                        
            @newRow_googleSheet = ( $shortNames{$TheCity}, $utc_string, cS($TempAvg), cS($DewPoint), cS($WindDir), cS($WindAvg), cS($WindMax), cS($BPAvg) );
            push @$googleSheet_data, \@newRow_googleSheet;
         }
      }
      
      # Write out the TimeMDY date to a special table that is used to quickly
      # populate the date combo box
      $sql = "INSERT INTO DaysGleaned (TimeMDY) VALUES ('$TheDate')";
      if ($TelemOK) {$cnTelem->Sql($sql);}

   } else { # Here is the corresponding else of the if-values-are-reasonable block.
      print "$TheDate, $TheHour, $TheMin, $StationNumber, $TheCity, $TempAvg, $DewPoint, $WindStuff, $WindDir, $WindAvg, $WindMax, $BPAvg\n";
      print "  Record failed reasonable test\n";
   }
}

sub getpage {
   my $URL = $_[0];

   # The next 9 elements passed in are supplied by the Now array in the main
   # body of this script.
   my @Now = ($_[1],$_[2],$_[3],$_[4],$_[5],$_[6],$_[7],$_[8],$_[9]);

   # Get the page of data. Note the get function from 'LWP::Simple' returns
   # undefined on error, so check for errors as follows....
   $foundPage = True;
   unless (defined ($content = get $URL)) {
      print OUTPUTFILE "Timestamp = @Now[5,4,3,2,1]\n";
      print OUTPUTFILE "Could not get $URL\n";
      close(OUTPUTFILE);
      $foundPage = False;
   }
}

sub dir360 {
   my $dirString = $_[0];
   my $dirAngle;

   if    ($dirString eq "N" ) { $dirAngle = 360; } 
   elsif ($dirString eq "NE") { $dirAngle =  45; } 
   elsif ($dirString eq "E" ) { $dirAngle =  90; } 
   elsif ($dirString eq "SE") { $dirAngle = 135; } 
   elsif ($dirString eq "S" ) { $dirAngle = 180; } 
   elsif ($dirString eq "SW") { $dirAngle = 225; } 
   elsif ($dirString eq "W" ) { $dirAngle = 270; } 
   elsif ($dirString eq "NW") { $dirAngle = 315; } 
   else                       { $dirAngle = "N/A"; }
   
   return $dirAngle;
}

sub parseandwrite {
   $TheCity = $_[0];
   
   # This offset is used to handle city names that have two words, for example "THE DALLES"
   my $offset = $_[1];

   $StationNumber = $_[2];

   # Split the page into lines
   @thelines = split (/\n/, $content);
   $nlines = @thelines;

   # Initialize the sensor values.
   $WindDir = "N/A";
   $WindAvg = "N/A";
   $WindMax = "N/A";
   $TempAvg = "N/A";
   $DewPoint = "N/A";
   $BPAvg = "N/A";

   # Parse it
   foreach (@thelines) {

      if (/PDT/ or /PST/) {

         # Split the line into words.
         @thewords = split(" ", $_);

         # Set this timezone parameter so the call to ParseDate works.
         # 
         # Note that this does NOT override the TZ value that is set in 
         # C:\Perl\site\lib\Date\manip.pm
         # I only set it here in case it is removed in manip.pm, then I correct
         # for the 2 hour difference down below where I shift back to 
         # standard time.
         #
         # Changes for MN (from PST8PDT)
         # PST8PDT, Pacific time. MST7MDT, Mountain time. CST6CDT, Central time. EST5EDT, Eastern time
         #$TZ = "PST8PDT";
         $TZ = "CST6CDT";

         # Substitute, so can get that silly ":" in there. First grab the
         # first 4 characters. Note there is a space at the beginning of the
         # line so must go 1 to 5 not 0 to 4

         $thetime = substr($_,1,5);
         #print " thetime = $thetime \n";
	 
         # If 1000, 10:00, do a special form of the substitution.
         if ($thetime =~ /000/) { 
            $_ =~ s/000/0:00/;
         } else {
            $_ =~ s/00/:00/;
         }
         #print " Raw stuff = $_ \n";

         # Parse the date to get around the problem of their am/pm format.
         #print "TZ = $ENV{'TZ'}\n";
         $ParsedDate = ParseDate($_);
         #print "ParseDate = $ParsedDate\n";

         # Subtract an hour to get back to standard time.
         # And subtract two hours to get back to Pacific time because DateCalc
         # tries to present the result in the local time zone.
         if ($thewords[2] eq "PDT") {
            $PSTDate = DateCalc($ParsedDate,"- 3hours",\$err);
         } else {
            $PSTDate = DateCalc($ParsedDate,"- 2hours",\$err);
         }
         #print "PSTDate = $PSTDate \n";

         # Use this formating function to return a date string
         $TheDate = UnixDate($PSTDate, "%m/%d/%y");
 
         # Finally get the hour and min with a simple substring call
         $TheHour = substr($PSTDate,8,2) + 0;
         $TheMin = substr($PSTDate,11,2) + 0;
         #print " $TheDate, $TheHour, $TheMin \n";
      }

      # Look for the first occurrence of the city where the NOT AVBL string is
      # absent. Not that the LAST statement below will cause a break out of
      # the for loop; that is the mechanism I use to prevent the city from
      # being parsed twice if it shows more than once on the page.

      # Also if you're worried about getting the wrong date associated with a
      # city match farther down on the page, don't. If a city match is made in
      # a secondary report on the page, the date match will have been updated
      # also; it keeps trying to update the date for each line in the for loop.
      # So don't be afraid to use urls that have multiple reports on the page.

      if ((/$TheCity/) && !(/NOT AVBL/)) {
         # The sky cover is two words only for "LGT RAIN"
         if (/LGT RAIN/ || /LGT SNOW/) {$offset++}
         @thewords = split(" ", $_);

         # If there's a letter at the end of the pressure string, remove it.
         # Otherwise just take the whole thing.
         $PressureStuff = $thewords[6 + $offset];
         $PressureStuff =~ /[a-zA-Z]/g;
         
         if (defined ($FirstLetter = pos($PressureStuff))) {
            $BPAvg = substr($PressureStuff, 0, $FirstLetter - 1);
         } else {
            $BPAvg = $PressureStuff;
         }
	
         # Not using this anymore, but left here as an example... The
         # following use of substr returns all but the last character in the
         # target string (If the third parameter is negative, it leaves that
         # many off the end of the target string). Target, offset, length. 
         #$BPAvg = substr($thewords[6 + $offset],0,-1);

         $TempAvg = $thewords[2 + $offset];
         $DewPoint = $thewords[3 + $offset];
         $WindStuff = $thewords[5 + $offset];

         # Interpret the windstuff string.

         # Check for CALM
         if ( ($WindStuff =~ /CALM/g) || ($WindStuff =~ /MISG/g) ) { 
            $WindAvg = 0;
            $WindMax = 0;
            $WindDir = "N/A";
            
         } else {
            # Split up the windstring into 4 parts.
            ($WindDir_string,$WindAvg,$Gust,$WindMax) = $WindStuff =~ /(\D+)(\d+)(\D+)?(\d+)?/;

            # Translate the winddirection string into a number.
            $WindDir = &dir360($WindDir_string);

            # If can't find the G (for Gust) in the wind string, just set the
            # max to the avg value
            if (!defined($Gust)) { 
               $WindMax = $WindAvg;
            }
         }

         # The following call, to write out to the database, only gets run if
         # the city match above was successful. 
         WriteToMDB;

         # Use this "last" statement to exit the for loop. Because there are
         # multiple line with these city strings. This way we only use the
         # first find with data.
         last;
      }

   }

}


# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# Main Program
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\


# Open the log file. Note the '$!' variable returns system errors.
@Now = localtime(time);

# Initialize the Google-sheet array that will add a row of data for each successful SQL write.
$googleSheet_data = [];

%shortNames = ('NORTH BEND' => 'NBend', 'MOSES LAKE'  => 'MLake', 
               'ELLENSBURG' => 'EBurg', 'WALLA WALLA' => 'Walla',
               'HERMISTON'  => 'Hermi', 'PASCO'       => 'Pasco',
               'THE DALLES' => 'Dalle', 'PORTLAND'    => 'PLand');

open(OUTPUTFILE, '>>C:\Users\Jim\Documents\webcontent\waconia\rosielog.txt') or do {
   print "Timestamp = @Now[5,4,3,2,1]\n";
   print "Could not find rosielog.txt\n";
   die "On open (Jdm): $!";
};  # This semicolon is important here...

print OUTPUTFILE localtime(time) . " from getnoaa.pl " . "\n";

# Make database connection objects and point them at the mdb files. Set flags
# to 0 if there is a problem. 
$DSN = "DRIVER={Microsoft Access Driver (*.mdb)};DBQ=C:/Users/Jim/Documents/webcontent/waconia/data/telem.mdb;UID=admin";

if (!($cnTelem = new Win32::ODBC($DSN))) {
   print OUTPUTFILE "Timestamp = @Now[5,4,3,2,1]\n";
   print OUTPUTFILE "Error connecting to $DSN\n";
   print OUTPUTFILE "Error: " . Win32::ODBC::Error() . "\n";
   $TelemOK = 0;
   warn "Error connecting to $DSN\n";
} else {
   $TelemOK = 1;
}

# Get the data and send it to the database.

#getpage("http://iwin.nws.noaa.gov/iwin/wa/hourly.html", @Now);
#getpage("http://www.weather.gov/view/prodsByState.php?state=WA&prodtype=hourly", @Now);
getpage("http://forecast.weather.gov/product.php?site=CRH&product=RWR&issuedby=WA", @Now);

if ($foundPage) {
   parseandwrite("PORTLAND",    1, 51);
   parseandwrite("THE DALLES",  2, 52);
   parseandwrite("PASCO",       0, 53);
   parseandwrite("HERMISTON",   1, 54);
   parseandwrite("WALLA WALLA", 1, 55);
   parseandwrite("ELLENSBURG",  0, 56);
   parseandwrite("MOSES LAKE",  1, 57);
}

# getpage("http://www.wrh.noaa.gov/pendleton/data/text/pdxhwrsch.html", @Now);
# if ($foundPage){
#    parseandwrite("BOARDMAN",    1, 58);
#    parseandwrite("IRRIGON",     1, 60);
#    parseandwrite("UMATILLA",    1, 61);
#    parseandwrite("ROOSEVELT",   0, 62);
#    parseandwrite("CELILO",      1, 63);
#    parseandwrite("RUFUS",       1, 64);
# }

#getpage("http://iwin.nws.noaa.gov/iwin/or/hourly.html", @Now);
#getpage("http://www.weather.gov/view/prodsByState.php?state=OR&prodtype=hourly", @Now);
getpage("http://forecast.weather.gov/product.php?site=CRH&product=RWR&issuedby=OR", @Now);

if ($foundPage) {
   parseandwrite("NORTH BEND",  1, 65);
}

# The following two lines of code are useful for testing the regular expression.
# It's just a test page on a local server.

#getpage("http://waconia.pnl.gov/TestWaconiaParse.htm", @Now);
#parseandwrite("RUFUS2",       1, 64);

# Close the database and file connections

if ($TelemOK) {$cnTelem->Close();}
close(OUTPUTFILE);

# Send the array of successful writes to the Google sheet.
my $row_count = scalar @$googleSheet_data;
print "row_count = $row_count \n";
if ($row_count > 0) {
      
   # Put the data into a JSON data structure.
   my $json_data = {
       'sheetName' => 'noaa',
       'weatherData' => $googleSheet_data
   };

   # Convert the JSON data to a string.
   my $json_string = encode_json($json_data);

   # Write the string to the file.
   open(my $fh, '>', 'noaadata.json') or die "Can't open file: $!";
   print $fh $json_string;
   close $fh;

   # Run the Python program that reads the file and Posts the JSON to the spreadsheet. Specify the file as an argument.
   my $cmd = "C:\\Users\\Jim\\Documents\\webcontent\\waconia\\pythonPostToSheet.py noaadata.json";
   system($cmd);
}