--- AnyEvent-HTTP/HTTP.pm 2011/01/11 23:49:37 1.94 +++ AnyEvent-HTTP/HTTP.pm 2013/05/17 07:19:23 1.116 @@ -48,7 +48,7 @@ use base Exporter::; -our $VERSION = '2.01'; +our $VERSION = '2.15'; our @EXPORT = qw(http_get http_post http_head http_request); @@ -125,7 +125,7 @@ =over 4 -=item 595 - errors during connection etsbalishment, proxy handshake. +=item 595 - errors during connection establishment, proxy handshake. =item 596 - errors during TLS negotiation, request sending and header processing. @@ -156,8 +156,8 @@ =item recurse => $count (default: $MAX_RECURSE) -Whether to recurse requests or not, e.g. on redirects, authentication -retries and so on, and how often to do so. +Whether to recurse requests or not, e.g. on redirects, authentication and +other retries and so on, and how often to do so. =item headers => hashref @@ -171,6 +171,9 @@ appropriate for your program - I wouldn't be surprised if the default AnyEvent string gets blocked by webservers sooner or later. +Also, make sure that your headers names and values do not contain any +embedded newlines. + =item timeout => $seconds The time-out to use for various stages - each connect attempt will reset @@ -181,11 +184,14 @@ =item proxy => [$host, $port[, $scheme]] or undef -Use the given http proxy for all requests. If not specified, then the -default proxy (as specified by C<$ENV{http_proxy}>) is used. +Use the given http proxy for all requests, or no proxy if C is +used. C<$scheme> must be either missing or must be C for HTTP. +If not specified, then the default proxy is used (see +C). + =item body => $string The request body, usually empty. Will be sent as-is (future versions of @@ -380,7 +386,7 @@ timeout of 30 seconds. http_request - GET => "https://www.google.com", + HEAD => "https://www.google.com", headers => { "user-agent" => "MySearchClient 1.0" }, timeout => 30, sub { @@ -531,7 +537,7 @@ \G\s* (?: expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+) - | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) )? + | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^;,[:space:]]*) ) )? ) }gcxsi ) { @@ -548,7 +554,7 @@ $value =~ s/\\(.)/$1/gs; } - push @kv, lc $name, $value; + push @kv, @kv ? lc $name : $name, $value; last unless /\G\s*;/gc; } @@ -685,6 +691,44 @@ () } +our %IDEMPOTENT = ( + DELETE => 1, + GET => 1, + HEAD => 1, + OPTIONS => 1, + PUT => 1, + TRACE => 1, + + ACL => 1, + "BASELINE-CONTROL" => 1, + BIND => 1, + CHECKIN => 1, + CHECKOUT => 1, + COPY => 1, + LABEL => 1, + LINK => 1, + MERGE => 1, + MKACTIVITY => 1, + MKCALENDAR => 1, + MKCOL => 1, + MKREDIRECTREF => 1, + MKWORKSPACE => 1, + MOVE => 1, + ORDERPATCH => 1, + PROPFIND => 1, + PROPPATCH => 1, + REBIND => 1, + REPORT => 1, + SEARCH => 1, + UNBIND => 1, + UNCHECKOUT => 1, + UNLINK => 1, + UNLOCK => 1, + UPDATE => 1, + UPDATEREDIRECTREF => 1, + "VERSION-CONTROL" => 1, +); + sub http_request($$@) { my $cb = pop; my ($method, $url, %arg) = @_; @@ -711,11 +755,11 @@ return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" }) if $recurse < 0; - my $proxy = $arg{proxy} || $PROXY; + my $proxy = exists $arg{proxy} ? $arg{proxy} : $PROXY; my $timeout = $arg{timeout} || $TIMEOUT; my ($uscheme, $uauthority, $upath, $query, undef) = # ignore fragment - $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?|; + $url =~ m|^([^:]+):(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?$|; $uscheme = lc $uscheme; @@ -769,17 +813,17 @@ $hdr{"content-length"} = length $arg{body} if length $arg{body} || $method ne "GET"; - my $idempotent = $method =~ /^(?:GET|HEAD|PUT|DELETE|OPTIONS|TRACE)$/; + my $idempotent = $IDEMPOTENT{$method}; # default value for keepalive is true iff the request is for an idempotent method - my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : $idempotent; - my $keepalive10 = exists $arg{keepalive10} ? $arg{keepalive10} : !$proxy; - my $keptalive; # true if this is actually a recycled connection + my $persistent = exists $arg{persistent} ? !!$arg{persistent} : $idempotent; + my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : !$proxy; + my $was_persistent; # true if this is actually a recycled connection # the key to use in the keepalive cache - my $ka_key = "$uhost\x00$arg{sessionid}"; + my $ka_key = "$uscheme\x00$uhost\x00$uport\x00$arg{sessionid}"; - $hdr{connection} = ($keepalive ? $keepalive10 ? "keep-alive " : "" : "close ") . "Te"; #1.1 + $hdr{connection} = ($persistent ? $keepalive ? "keep-alive " : "" : "close ") . "Te"; #1.1 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 my %state = (connect_guard => 1); @@ -873,11 +917,11 @@ } } - my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive]) + my $finish = sub { # ($data, $err_status, $err_reason[, $persistent]) if ($state{handle}) { # handle keepalive if ( - $keepalive + $persistent && $_[3] && ($hdr{HTTPVersion} < 1.1 ? $hdr{connection} =~ /\bkeep-?alive\b/i @@ -906,13 +950,17 @@ # we ignore any errors, as it is very common to receive # Content-Length != 0 but no actual body # we also access %hdr, as $_[1] might be an erro - http_request ( - $method => $hdr{location}, - %arg, - recurse => $recurse - 1, - Redirect => [$_[0], \%hdr], - $cb - ); + $state{recurse} = + http_request ( + $method => $hdr{location}, + %arg, + recurse => $recurse - 1, + Redirect => [$_[0], \%hdr], + sub { + %state = (); + &$cb + }, + ); } else { $cb->($_[0], \%hdr); } @@ -954,7 +1002,7 @@ $state{read_chunk} = sub { $_[1] =~ /^([0-9a-fA-F]+)/ - or $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); + or return $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); my $len = hex $1; @@ -1034,17 +1082,23 @@ # if keepalive is enabled, then the server closing the connection # before a response can happen legally - we retry on idempotent methods. - if ($keptalive && $idempotent) { + if ($was_persistent && $idempotent) { my $old_eof = $hdl->{on_eof}; $hdl->{on_eof} = sub { _destroy_state %state; - http_request ( - $method => $url, - %arg, - keepalive => 0, - $cb - ); + %state = (); + $state{recurse} = + http_request ( + $method => $url, + %arg, + recurse => $recurse - 1, + keepalive => 0, + sub { + %state = (); + &$cb + } + ); }; $hdl->on_read (sub { return unless %state; @@ -1062,13 +1116,14 @@ my $prepare_handle = sub { my ($hdl) = $state{handle}; - $hdl->timeout ($timeout); $hdl->on_error (sub { _error %state, $cb, { @pseudo, Status => $ae_error, Reason => $_[2] }; }); $hdl->on_eof (sub { _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" }; }); + $hdl->timeout_reset; + $hdl->timeout ($timeout); }; # connected to proxy (or origin server) @@ -1119,10 +1174,15 @@ # try to use an existing keepalive connection, but only if we, ourselves, plan # on a keepalive request (in theory, this should be a separate config option). - if ($keepalive && $KA_CACHE{$ka_key}) { - $keptalive = 1; + if ($persistent && $KA_CACHE{$ka_key}) { + $was_persistent = 1; + $state{handle} = ka_fetch $ka_key; + $state{handle}->destroyed + and die "AnyEvent::HTTP: unexpectedly got a destructed handle (1), please report.";#d# $prepare_handle->(); + $state{handle}->destroyed + and die "AnyEvent::HTTP: unexpectedly got a destructed handle (2), please report.";#d# $handle_actual_request->(); } else { @@ -1174,6 +1234,10 @@ To clear an already-set proxy, use C. +When AnyEvent::HTTP is loaded for the first time it will query the +default proxy from the operating system, currently by looking at +C<$ENV{http_proxy>}. + =item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] Remove all cookies from the cookie jar that have been expired. If @@ -1192,11 +1256,12 @@ emptied. All other keys are hostnames or IP addresses pointing to hash-references. The key for these inner hash references is the server path for which this cookie is meant, and the values are again -hash-references. The keys of those hash-references is the cookie name, and +hash-references. Each key of those hash-references is a cookie name, and the value, you guessed it, is another hash-reference, this time with the key-value pairs from the cookie, except for C and C, which have been replaced by a C<_expires> key that contains the cookie -expiry timestamp. +expiry timestamp. Session cookies are indicated by not having an +C<_expires> key. Here is an example of a cookie jar with a single cookie, so you have a chance of understanding the above paragraph: @@ -1230,7 +1295,7 @@ =item $AnyEvent::HTTP::TIMEOUT -The default timeout for conenction operations (default: C<300>). +The default timeout for connection operations (default: C<300>). =item $AnyEvent::HTTP::USERAGENT @@ -1254,7 +1319,7 @@ =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT -The time after which idle persistent conenctions get closed by +The time after which idle persistent connections get closed by AnyEvent::HTTP (default: C<3>). =item $AnyEvent::HTTP::ACTIVE @@ -1305,7 +1370,7 @@ for (0..11) { if ($m eq $month[$_]) { require Time::Local; - return Time::Local::timegm ($S, $M, $H, $d, $_, $y); + return eval { Time::Local::timegm ($S, $M, $H, $d, $_, $y) }; } } @@ -1334,8 +1399,8 @@ =head2 HTTP/1.1 FILE DOWNLOAD -Downloading files with HTTP cna be quite tricky, especially when something -goes wrong and you want tor esume. +Downloading files with HTTP can be quite tricky, especially when something +goes wrong and you want to resume. Here is a function that initiates and resumes a download. It uses the last modified time to check for file content changes, and works with many @@ -1361,7 +1426,7 @@ warn -s _; if (stat $fh and -s _) { $ofs = -s _; - warn "-s is ", $ofs;#d# + warn "-s is ", $ofs; $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; $hdr{"range"} = "bytes=$ofs-"; }