#!/usr/bin/perl -w # # randastro # # Create a csv file of all of the Astronomy Picture of the Day # images from the start until present. # # This is typically called out from the user's crontab with something like: # 30 5 * * * .../randastro ~/.rootwindow/randastro2.csv # use strict; use LWP::Simple; use vars qw($BaseURL %Csv $NFetch $SYear $SMon $SDay $Version); $BaseURL = 'http://antwrp.gsfc.nasa.gov/apod'; $Version = 'randastro v0.01 regan@ao.com'; $SYear = 95; $SMon = 6; $SDay = 20; $NFetch = 20; # Don't get too many images in a single run ### ### Main program ### my($day, $day2, $fetch, @fields, $mon, $mon2, @montab, @tm, $urlhtml, $year, $year2); # Read the existing CSV file # url,0,0,url.html if (!defined($ARGV[0])) { print "Usage: randastro file.csv\n"; exit 1; } if (open(CSV, "<$ARGV[0]")) { while () { chomp; @fields = split(',', $_); $Csv{$fields[3]} = $fields[0]; } } # Cycle through all possible URLs @montab = qw(31 28 31 30 31 30 31 31 30 31 30 31); @tm = localtime(); $fetch = 0; for ($year = 95; $year <= $tm[5] && $fetch < $NFetch; $year++) { $montab[1] = ((($year + 1900) % 4) == 0) ? 29 : 28; for ($mon = 0; $mon < 12; $mon++) { for ($day = 1; $day < 32 && $fetch < $NFetch; $day++) { # If this isn't a valid day, recycle last if ($day > $montab[$mon]); # If this is >= today, we quit $fetch = $NFetch if ($year == $tm[5] && $mon == $tm[4] && $day == $tm[3]); # If this is < start, recycle next if ($year == $SYear && (($mon + 1) < $SMon) || (($mon + 1) == $SMon && $day < $SDay)); next if ($year % 100 == 6 && $mon + 1 == 5 && $day == 29); $urlhtml = sprintf "$BaseURL/ap%02d%02d%02d.html", $year % 100, $mon + 1, $day; #print "Checking $urlhtml\n"; if (!defined($Csv{$urlhtml})) { NewImage($urlhtml); $fetch++; } } } } # Write new csv file. open(CSV, ">$ARGV[0]") || die "Cannot open $ARGV[0] for write: $!"; for $urlhtml (sort keys %Csv) { print CSV "$Csv{$urlhtml},0,0,$urlhtml\n"; } close CSV; ### ### NewImage ### ### Get a new image file based upon the URL. ### sub NewImage { my($url) = @_; my($data, $newurl); print "Fetch $url\n"; $data = get($url); if (defined($data) && length($data) > 0 && $data =~ /img src\s*=\s*"(.*?)"/im) { $newurl = "$BaseURL/$1"; print "Url is $newurl\n"; if ($data =~ m#a href="(image/.*?)"#im) { $newurl = "$BaseURL/$1"; print "Use high resolution url of $newurl\n"; } $Csv{$url} = $newurl; } }