--- AnyEvent-HTTP/HTTP.pm 2011/01/04 08:17:59 1.92 +++ AnyEvent-HTTP/HTTP.pm 2011/02/24 15:09:03 1.104 @@ -17,7 +17,7 @@ This module implements a simple, stateless and non-blocking HTTP client. It supports GET, POST and other request methods, cookies and more, -all on a very low level. It can follow redirects supports proxies and +all on a very low level. It can follow redirects, supports proxies, and automatically limits the number of connections to the values specified in the RFC. @@ -48,7 +48,7 @@ use base Exporter::; -our $VERSION = '2.0'; +our $VERSION = '2.1'; our @EXPORT = qw(http_get http_post http_head http_request); @@ -171,6 +171,9 @@ appropriate for your program - I wouldn't be surprised if the default AnyEvent string gets blocked by webservers sooner or later. +Also, make sure that your headers names and values do not contain any +embedded newlines. + =item timeout => $seconds The time-out to use for various stages - each connect attempt will reset @@ -181,11 +184,14 @@ =item proxy => [$host, $port[, $scheme]] or undef -Use the given http proxy for all requests. If not specified, then the -default proxy (as specified by C<$ENV{http_proxy}>) is used. +Use the given http proxy for all requests, or no proxy if C is +used. C<$scheme> must be either missing or must be C for HTTP. +If not specified, then the default proxy is used (see +C). + =item body => $string The request body, usually empty. Will be sent as-is (future versions of @@ -531,7 +537,7 @@ \G\s* (?: expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+) - | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) )? + | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^;,[:space:]]*) ) )? ) }gcxsi ) { @@ -548,7 +554,7 @@ $value =~ s/\\(.)/$1/gs; } - push @kv, lc $name, $value; + push @kv, @kv ? lc $name : $name, $value; last unless /\G\s*;/gc; } @@ -711,11 +717,11 @@ return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" }) if $recurse < 0; - my $proxy = $arg{proxy} || $PROXY; + my $proxy = exists $arg{proxy} ? $arg{proxy} : $PROXY; my $timeout = $arg{timeout} || $TIMEOUT; my ($uscheme, $uauthority, $upath, $query, undef) = # ignore fragment - $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?|; + $url =~ m|^([^:]+):(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?$|; $uscheme = lc $uscheme; @@ -772,14 +778,14 @@ my $idempotent = $method =~ /^(?:GET|HEAD|PUT|DELETE|OPTIONS|TRACE)$/; # default value for keepalive is true iff the request is for an idempotent method - my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : $idempotent; - my $keepalive10 = exists $arg{keepalive10} ? $arg{keepalive10} : !$proxy; - my $keptalive; # true if this is actually a recycled connection + my $persistent = exists $arg{persistent} ? !!$arg{persistent} : $idempotent; + my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : !$proxy; + my $was_persistent; # true if this is actually a recycled connection # the key to use in the keepalive cache my $ka_key = "$uhost\x00$arg{sessionid}"; - $hdr{connection} = ($keepalive ? $keepalive10 ? "keep-alive " : "" : "close ") . "Te"; #1.1 + $hdr{connection} = ($persistent ? $keepalive ? "keep-alive " : "" : "close ") . "Te"; #1.1 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 my %state = (connect_guard => 1); @@ -873,11 +879,11 @@ } } - my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive]) + my $finish = sub { # ($data, $err_status, $err_reason[, $persistent]) if ($state{handle}) { # handle keepalive if ( - $keepalive + $persistent && $_[3] && ($hdr{HTTPVersion} < 1.1 ? $hdr{connection} =~ /\bkeep-?alive\b/i @@ -906,13 +912,17 @@ # we ignore any errors, as it is very common to receive # Content-Length != 0 but no actual body # we also access %hdr, as $_[1] might be an erro - http_request ( - $method => $hdr{location}, - %arg, - recurse => $recurse - 1, - Redirect => [$_[0], \%hdr], - $cb - ); + $state{recurse} = + http_request ( + $method => $hdr{location}, + %arg, + recurse => $recurse - 1, + Redirect => [$_[0], \%hdr], + sub { + %state = (); + &$cb + }, + ); } else { $cb->($_[0], \%hdr); } @@ -1034,17 +1044,22 @@ # if keepalive is enabled, then the server closing the connection # before a response can happen legally - we retry on idempotent methods. - if ($keptalive && $idempotent) { + if ($was_persistent && $idempotent) { my $old_eof = $hdl->{on_eof}; $hdl->{on_eof} = sub { _destroy_state %state; - http_request ( - $method => $url, - %arg, - keepalive => 0, - $cb - ); + %state = (); + $state{recurse} = + http_request ( + $method => $url, + %arg, + keepalive => 0, + sub { + %state = (); + &$cb + } + ); }; $hdl->on_read (sub { return unless %state; @@ -1062,13 +1077,14 @@ my $prepare_handle = sub { my ($hdl) = $state{handle}; - $hdl->timeout ($timeout); $hdl->on_error (sub { _error %state, $cb, { @pseudo, Status => $ae_error, Reason => $_[2] }; }); $hdl->on_eof (sub { _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" }; }); + $hdl->timeout_reset; + $hdl->timeout ($timeout); }; # connected to proxy (or origin server) @@ -1119,11 +1135,18 @@ # try to use an existing keepalive connection, but only if we, ourselves, plan # on a keepalive request (in theory, this should be a separate config option). - if ($keepalive && $KA_CACHE{$ka_key}) { - $keptalive = 1; + if ($persistent && $KA_CACHE{$ka_key}) { + $was_persistent = 1; + $state{handle} = ka_fetch $ka_key; + $state{handle}->destroyed + and die "got a destructed habndle. pah\n";#d# $prepare_handle->(); + $state{handle}->destroyed + and die "got a destructed habndle. pa2\n";#d# $handle_actual_request->(); + $state{handle}->destroyed + and die "got a destructed habndle. pa3\n";#d# } else { my $tcp_connect = $arg{tcp_connect} @@ -1174,6 +1197,10 @@ To clear an already-set proxy, use C. +When AnyEvent::HTTP is laoded for the first time it will query the +default proxy from the operating system, currently by looking at +C<$ENV{http_proxy>}. + =item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] Remove all cookies from the cookie jar that have been expired. If @@ -1327,7 +1354,109 @@ set_proxy $ENV{http_proxy}; }; -=head2 SOCKS PROXIES +=head2 SHOWCASE + +This section contaisn some more elaborate "real-world" examples or code +snippets. + +=head2 HTTP/1.1 FILE DOWNLOAD + +Downloading files with HTTP can be quite tricky, especially when something +goes wrong and you want to resume. + +Here is a function that initiates and resumes a download. It uses the +last modified time to check for file content changes, and works with many +HTTP/1.0 servers as well, and usually falls back to a complete re-download +on older servers. + +It calls the completion callback with either C, which means a +nonretryable error occured, C<0> when the download was partial and should +be retried, and C<1> if it was successful. + + use AnyEvent::HTTP; + + sub download($$$) { + my ($url, $file, $cb) = @_; + + open my $fh, "+<", $file + or die "$file: $!"; + + my %hdr; + my $ofs = 0; + + warn stat $fh; + warn -s _; + if (stat $fh and -s _) { + $ofs = -s _; + warn "-s is ", $ofs;#d# + $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; + $hdr{"range"} = "bytes=$ofs-"; + } + + http_get $url, + headers => \%hdr, + on_header => sub { + my ($hdr) = @_; + + if ($hdr->{Status} == 200 && $ofs) { + # resume failed + truncate $fh, $ofs = 0; + } + + sysseek $fh, $ofs, 0; + + 1 + }, + on_body => sub { + my ($data, $hdr) = @_; + + if ($hdr->{Status} =~ /^2/) { + length $data == syswrite $fh, $data + or return; # abort on write errors + } + + 1 + }, + sub { + my (undef, $hdr) = @_; + + my $status = $hdr->{Status}; + + if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) { + utime $fh, $time, $time; + } + + if ($status == 200 || $status == 206 || $status == 416) { + # download ok || resume ok || file already fully downloaded + $cb->(1, $hdr); + + } elsif ($status == 412) { + # file has changed while resuming, delete and retry + unlink $file; + $cb->(0, $hdr); + + } elsif ($status == 500 or $status == 503 or $status =~ /^59/) { + # retry later + $cb->(0, $hdr); + + } else { + $cb->(undef, $hdr); + } + } + ; + } + + download "http://server/somelargefile", "/tmp/somelargefile", sub { + if ($_[0]) { + print "OK!\n"; + } elsif (defined $_[0]) { + print "please retry later\n"; + } else { + print "ERROR\n"; + } + }; + +=head3 SOCKS PROXIES Socks proxies are not directly supported by AnyEvent::HTTP. You can compile your perl to support socks, or use an external program such as