--- AnyEvent-HTTP/HTTP.pm 2008/07/02 01:23:41 1.23 +++ AnyEvent-HTTP/HTTP.pm 2008/10/23 02:46:20 1.30 @@ -50,7 +50,7 @@ use base Exporter::; -our $VERSION = '1.03'; +our $VERSION = '1.05'; our @EXPORT = qw(http_get http_post http_head http_request); @@ -73,23 +73,29 @@ =item http_get $url, key => value..., $cb->($data, $headers) Executes an HTTP-GET request. See the http_request function for details on -additional parameters. +additional parameters and the return value. =item http_head $url, key => value..., $cb->($data, $headers) -Executes an HTTP-HEAD request. See the http_request function for details on -additional parameters. +Executes an HTTP-HEAD request. See the http_request function for details +on additional parameters and the return value. =item http_post $url, $body, key => value..., $cb->($data, $headers) -Executes an HTTP-POST request with a request body of C<$bod>. See the -http_request function for details on additional parameters. +Executes an HTTP-POST request with a request body of C<$body>. See the +http_request function for details on additional parameters and the return +value. =item http_request $method => $url, key => value..., $cb->($data, $headers) Executes a HTTP request of type C<$method> (e.g. C, C). The URL must be an absolute http or https URL. +When called in void context, nothing is returned. In other contexts, +C returns a "cancellation guard" - you have to keep the +object at least alive until the callback get called. If the object gets +destroyed before the callbakc is called, the request will be cancelled. + The callback will be called with the response data as first argument (or C if it wasn't available due to errors), and a hash-ref with response headers as second argument. @@ -191,6 +197,16 @@ } ; +Example: make another simple HTTP GET request, but immediately try to +cancel it. + + my $request = http_request GET => "http://www.nethype.de/", sub { + my ($body, $hdr) = @_; + print "$body\n"; + }; + + undef $request; + =cut sub _slot_schedule; @@ -276,8 +292,13 @@ my @cookie; while (my ($chost, $v) = each %$jar) { - next unless $chost eq substr $uhost, -length $chost; - next unless $chost =~ /^\./; + if ($chost =~ /^\./) { + next unless $chost eq substr $uhost, -length $chost; + } elsif ($chost =~ /\./) { + next unless $chost eq $uhost; + } else { + next; + } while (my ($cpath, $v) = each %$v) { next unless $cpath eq substr $upath, 0, length $cpath; @@ -357,7 +378,7 @@ # status line $state{handle}->push_read (line => qr/\015?\012/, sub { - $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix + $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url })); my %hdr = ( # response headers @@ -371,7 +392,8 @@ $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { for ("$_[1]\012") { # we support spaces in field names, as lotus domino - # creates them. + # creates them (actually spaces around seperators + # are strictly allowed in http, they are a security issue). $hdr{lc $1} .= "\x00$2" while /\G ([^:\000-\037]+): @@ -388,6 +410,10 @@ for values %hdr; my $finish = sub { + # TODO: use destroy method, when/if available + #$state{handle}->destroy; + $state{handle}->on_eof (undef); + $state{handle}->on_error (undef); %state = (); # set-cookie processing @@ -397,16 +423,22 @@ my ($name, $value) = split /=/, $cookie, 2; my %kv = (value => $value, map { split /=/, $_, 2 } @arg); - my $cdom = (delete $kv{domain}) || $uhost; + my $cdom; my $cpath = (delete $kv{path}) || "/"; - - $cdom =~ s/^.?/./; # make sure it starts with a "." - next if $cdom =~ /\.$/; + if (exists $kv{domain}) { + $cdom = delete $kv{domain}; - # this is not rfc-like and not netscape-like. go figure. - my $ndots = $cdom =~ y/.//; - next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2); + $cdom =~ s/^\.?/./; # make sure it starts with a "." + + next if $cdom =~ /\.$/; + + # this is not rfc-like and not netscape-like. go figure. + my $ndots = $cdom =~ y/.//; + next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2); + } else { + $cdom = $uhost; + } # store it $arg{cookie_jar}{version} = 1; @@ -414,19 +446,31 @@ } } - if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") { - # microsoft and other assholes don't give a shit for following standards, - # try to support a common form of broken Location header. - $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%; + # microsoft and other shitheads don't give a shit for following standards, + # try to support some common forms of broken Location headers. + if ($_[1]{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) { + $_[1]{location} =~ s/^\.\/+//; + + my $url = "$scheme://$uhost:$uport"; + + unless ($_[1]{location} =~ s/^\///) { + $url .= $upath; + $url =~ s/\/[^\/]*$//; + } + $_[1]{location} = "$url/$_[1]{location}"; + } + + if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") { # apparently, mozilla et al. just change POST to GET here # more research is needed before we do the same - http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb); } elsif ($_[1]{Status} == 303 && $recurse) { - $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%; - - http_request (GET => $_[1]{location}, %arg, recurse => $recurse - 1, $cb); + # even http/1.1 is unclear on how to mutate the method + $method = "GET" unless $method eq "HEAD"; + http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb); + } elsif ($_[1]{Status} == 307 && $recurse && $method =~ /^(?:GET|HEAD)$/) { + http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb); } else { $cb->($_[0], $_[1]); }