#!/usr/local/bin/perl -w
use strict;
use LWP::UserAgent;
my $ua = LWP::UserAgent->new;
# Set attributes on UA
$ua->timeout(30);
$ua->agent("SJK downloading Year in Review: " . $ua->agent);
$ua->env_proxy ();
# fetch each article
open ENTRIES, "<ENTRIES"
or die "can't open ENTRIES file: $!";
while (<ENTRIES>) {
chomp;
my $url = "http://www.wikipedia.com/wiki.cgi?action=history&id=$_";
print "Getting $url...
";
my $request = HTTP::Request->new('GET', $url);
my $response = $ua->request($request);
if ($response->is_error) {
die $response->status_line . "
";
}
my $maxrev = 0;
my @lines = split(/
/,$response->content);
for my $line (@lines) {
if ($line =~ [[Simon_J_Kissane/Revision|/Revision]] ([0-9]*): <a href=/so) {
if ($1 > $maxrev) { $maxrev = $1; };
}
}
$url = "http://www.wikipedia.com/wiki.cgi?action=edit&revision=$maxrev&id=$_";
print "Getting $url...
";
$request = HTTP::Request->new('GET', $url);
$response = $ua->request($request);
if ($response->is_error) {
die $response->status_line . "
";
}
open DATA, ">data/$_"
or die "Can't open data/$_: $!";
if ($response->content =~ /<textarea[^>]*>(.*)</textarea>/s) {
my $out = "#YEAR [[$_]] REV=$maxrev
" . $1;
$out =~ s/'/'/g;
print DATA $out;
}
close DATA;
}
- See also : Simon J Kissane