--- AnyEvent-HTTP/HTTP.pm 2008/06/06 16:23:57 1.18 +++ AnyEvent-HTTP/HTTP.pm 2008/07/02 01:23:41 1.23 @@ -50,7 +50,7 @@ use base Exporter::; -our $VERSION = '1.01'; +our $VERSION = '1.03'; our @EXPORT = qw(http_get http_post http_head http_request); @@ -95,10 +95,13 @@ response headers as second argument. All the headers in that hash are lowercased. In addition to the response -headers, the three "pseudo-headers" C, C and -C contain the three parts of the HTTP Status-Line of the same -name. If the server sends a header multiple lines, then their contents -will be joined together with C<\x00>. +headers, the "pseudo-headers" C, C and C +contain the three parts of the HTTP Status-Line of the same name. The +pseudo-header C contains the original URL (which can differ from the +requested URL when following redirects). + +If the server sends a header multiple lines, then their contents will be +joined together with C<\x00>. If an internal error occurs, such as not being able to resolve a hostname, then C<$data> will be C, C<< $headers->{Status} >> will be C<599> @@ -234,9 +237,9 @@ } } - my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE; + my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE; - return $cb->(undef, { Status => 599, Reason => "recursion limit reached" }) + return $cb->(undef, { Status => 599, Reason => "recursion limit reached", URL => $url }) if $recurse < 0; my $proxy = $arg{proxy} || $PROXY; @@ -251,12 +254,12 @@ my $uport = $scheme eq "http" ? 80 : $scheme eq "https" ? 443 - : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported" }); + : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported", URL => $url }); $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x - or return $cb->(undef, { Status => 599, Reason => "unparsable URL" }); + or return $cb->(undef, { Status => 599, Reason => "unparsable URL", URL => $url }); my $uhost = $1; $uport = $2 if defined $2; @@ -311,7 +314,7 @@ $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { $state{fh} = shift - or return $cb->(undef, { Status => 599, Reason => "$!" }); + or return $cb->(undef, { Status => 599, Reason => "$!", URL => $url }); delete $state{connect_guard}; # reduce memory usage, save a tree @@ -335,11 +338,11 @@ $state{handle}->on_error (sub { my $errno = "$!"; %state = (); - $cb->(undef, { Status => 599, Reason => $errno }); + $cb->(undef, { Status => 599, Reason => $errno, URL => $url }); }); $state{handle}->on_eof (sub { %state = (); - $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); + $cb->(undef, { Status => 599, Reason => "unexpected end-of-file", URL => $url }); }); # send request @@ -355,12 +358,13 @@ # status line $state{handle}->push_read (line => qr/\015?\012/, sub { $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix - or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" })); + or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url })); my %hdr = ( # response headers HTTPVersion => "\x00$1", Status => "\x00$2", Reason => "\x00$3", + URL => "\x00$url" ); # headers, could be optimized a bit @@ -377,7 +381,7 @@ /gxc; /\G$/ - or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" })); + or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers", URL => $url })); } substr $_, 0, 1, "" @@ -410,12 +414,19 @@ } } - if ($_[1]{Status} =~ /^30[12]$/ && $recurse) { + if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") { # microsoft and other assholes don't give a shit for following standards, # try to support a common form of broken Location header. $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%; + # apparently, mozilla et al. just change POST to GET here + # more research is needed before we do the same + http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb); + } elsif ($_[1]{Status} == 303 && $recurse) { + $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%; + + http_request (GET => $_[1]{location}, %arg, recurse => $recurse - 1, $cb); } else { $cb->($_[0], $_[1]); } @@ -464,7 +475,8 @@ } sub http_post($$@) { - unshift @_, "POST", "body"; + my $url = shift; + unshift @_, "POST", $url, "body"; &http_request }