--- AnyEvent-HTTP/HTTP.pm 2013/11/18 01:01:02 1.118 +++ AnyEvent-HTTP/HTTP.pm 2019/10/16 01:20:02 1.136 @@ -48,7 +48,7 @@ use base Exporter::; -our $VERSION = '2.15'; +our $VERSION = 2.24; our @EXPORT = qw(http_get http_post http_head http_request); @@ -159,6 +159,11 @@ Whether to recurse requests or not, e.g. on redirects, authentication and other retries and so on, and how often to do so. +Only redirects to http and https URLs are supported. While most common +redirection forms are handled entirely within this module, some require +the use of the optional L module. If it is required but missing, then +the request will fail with an error. + =item headers => hashref The request headers to use. Currently, C may provide its own @@ -192,6 +197,14 @@ If not specified, then the default proxy is used (see C). +Currently, if your proxy requires authorization, you have to specify an +appropriate "Proxy-Authorization" header in every request. + +Note that this module will prefer an existing persistent connection, +even if that connection was made using another proxy. If you need to +ensure that a new connection is made in this case, you can either force +C to false or e.g. use the proxy address in your C. + =item body => $string The request body, usually empty. Will be sent as-is (future versions of @@ -233,13 +246,15 @@ See also the C parameter. -=item session => $string +=item sessionid => $string -The module might reuse connections to the same host internally. Sometimes -(e.g. when using TLS), you do not want to reuse connections from other -sessions. This can be achieved by setting this parameter to some unique -ID (such as the address of an object storing your state data, or the TLS -context) - only connections using the same unique ID will be reused. +The module might reuse connections to the same host internally (regardless +of other settings, such as C or C). Sometimes (e.g. +when using TLS or a specfic proxy), you do not want to reuse connections +from other sessions. This can be achieved by setting this parameter to +some unique ID (such as the address of an object storing your state data +or the TLS context, or the proxy IP) - only connections using the same +unique ID will be reused. =item on_prepare => $callback->($fh) @@ -258,6 +273,12 @@ obviously, it has to follow the same calling conventions, except that it may always return a connection guard object. +The connections made by this hook will be treated as equivalent to +connections made the built-in way, specifically, they will be put into +and taken from the persistent connection cache. If your C<$tcp_connect> +function is incompatible with this kind of re-use, consider switching off +C connections and/or providing a C identifier. + There are probably lots of weird uses for this function, starting from tracing the hosts C actually tries to connect, to (inexact but fast) host => IP address caching or even socks protocol support. @@ -336,8 +357,8 @@ Try to create/reuse a persistent connection. When this flag is set (default: true for idempotent requests, false for all others), then C tries to re-use an existing (previously-created) -persistent connection to the host and, failing that, tries to create a new -one. +persistent connection to same host (i.e. identical URL scheme, hostname, +port and sessionid) and, failing that, tries to create a new one. Requests failing in certain ways will be automatically retried once, which is dangerous for non-idempotent requests, which is why it defaults to off @@ -347,7 +368,7 @@ your request or not. When reusing an existent connection, many parameters (such as TLS context) -will be ignored. See the C parameter for a workaround. +will be ignored. See the C parameter for a workaround. =item keepalive => $boolean @@ -448,7 +469,7 @@ sub cookie_jar_expire($;$) { my ($jar, $session_end) = @_; - %$jar = () if $jar->{version} != 1; + %$jar = () if $jar->{version} != 2; my $anow = AE::now; @@ -478,20 +499,19 @@ sub cookie_jar_extract($$$$) { my ($jar, $scheme, $host, $path) = @_; - %$jar = () if $jar->{version} != 1; + %$jar = () if $jar->{version} != 2; + + $host = AnyEvent::Util::idn_to_ascii $host + if $host =~ /[^\x00-\x7f]/; my @cookies; while (my ($chost, $paths) = each %$jar) { next unless ref $paths; - if ($chost =~ /^\./) { - next unless $chost eq substr $host, -length $chost; - } elsif ($chost =~ /\./) { - next unless $chost eq $host; - } else { - next; - } + # exact match or suffix including . match + $chost eq $host or ".$chost" eq substr $host, -1 - length $chost + or next; while (my ($cpath, $cookies) = each %$paths) { next unless $cpath eq substr $path, 0, length $cpath; @@ -523,6 +543,8 @@ sub cookie_jar_set_cookie($$$$) { my ($jar, $set_cookie, $host, $date) = @_; + %$jar = () if $jar->{version} != 2; + my $anow = int AE::now; my $snow; # server-now @@ -577,7 +599,7 @@ my $cpath = (delete $kv{path}) || "/"; if (exists $kv{domain}) { - $cdom = delete $kv{domain}; + $cdom = $kv{domain}; $cdom =~ s/^\.?/./; # make sure it starts with a "." @@ -586,12 +608,14 @@ # this is not rfc-like and not netscape-like. go figure. my $ndots = $cdom =~ y/.//; next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2); + + $cdom = substr $cdom, 1; # remove initial . } else { $cdom = $host; } # store it - $jar->{version} = 1; + $jar->{version} = 2; $jar->{lc $cdom}{$cpath}{$name} = \%kv; redo if /\G\s*,/gc; @@ -767,7 +791,7 @@ : $uscheme eq "https" ? 443 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); - $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x + $uauthority =~ /^(?: .*\@ )? ([^\@]+?) (?: : (\d+) )?$/x or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); my $uhost = lc $1; @@ -843,7 +867,7 @@ "$method $rpath HTTP/1.1\015\012" . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr) . "\015\012" - . (delete $arg{body}) + . $arg{body} ); # return if error occurred during push_write() @@ -883,19 +907,42 @@ } # redirect handling - # microsoft and other shitheads don't give a shit for following standards, - # try to support some common forms of broken Location headers. - if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) { - $hdr{location} =~ s/^\.\/+//; - - my $url = "$rscheme://$uhost:$uport"; - - unless ($hdr{location} =~ s/^\///) { - $url .= $upath; - $url =~ s/\/[^\/]*$//; + # relative uri handling forced by microsoft and other shitheads. + # we give our best and fall back to URI if available. + if (exists $hdr{location}) { + my $loc = $hdr{location}; + + if ($loc =~ m%^//%) { # // + $loc = "$uscheme:$loc"; + + } elsif ($loc eq "") { + $loc = $url; + + } elsif ($loc !~ /^(?: $ | [^:\/?\#]+ : )/x) { # anything "simple" + $loc =~ s/^\.\/+//; + + if ($loc !~ m%^[.?#]%) { + my $prefix = "$uscheme://$uauthority"; + + unless ($loc =~ s/^\///) { + $prefix .= $upath; + $prefix =~ s/\/[^\/]*$//; + } + + $loc = "$prefix/$loc"; + + } elsif (eval { require URI }) { # uri + $loc = URI->new_abs ($loc, $url)->as_string; + + } else { + return _error %state, $cb, { @pseudo, Status => 599, Reason => "Cannot parse Location (URI module missing)" }; + #$hdr{Status} = 599; + #$hdr{Reason} = "Unparsable Redirect (URI module missing)"; + #$recurse = 0; + } } - $hdr{location} = "$url/$hdr{location}"; + $hdr{location} = $loc; } my $redirect; @@ -907,12 +954,16 @@ # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1. # also, the UA should ask the user for 301 and 307 and POST, # industry standard seems to be to simply follow. - # we go with the industry standard. + # we go with the industry standard. 308 is defined + # by rfc7538 if ($status == 301 or $status == 302 or $status == 303) { - # HTTP/1.1 is unclear on how to mutate the method - $method = "GET" unless $method eq "HEAD"; $redirect = 1; - } elsif ($status == 307) { + # HTTP/1.1 is unclear on how to mutate the method + unless ($method eq "HEAD") { + $method = "GET"; + delete $arg{body}; + } + } elsif ($status == 307 or $status == 308) { $redirect = 1; } } @@ -998,7 +1049,7 @@ } elsif ($chunked) { my $cl = 0; my $body = ""; - my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; + my $on_body = (!$redirect && $arg{on_body}) || sub { $body .= shift; 1 }; $state{read_chunk} = sub { $_[1] =~ /^([0-9a-fA-F]+)/ @@ -1041,7 +1092,7 @@ $_[0]->push_read (line => $state{read_chunk}); - } elsif ($arg{on_body}) { + } elsif (!$redirect && $arg{on_body}) { if (defined $len) { $_[0]->on_read (sub { $len -= length $_[0]{rbuf}; @@ -1090,10 +1141,10 @@ %state = (); $state{recurse} = http_request ( - $method => $url, + $method => $url, %arg, - recurse => $recurse - 1, - keepalive => 0, + recurse => $recurse - 1, + persistent => 0, sub { %state = (); &$cb @@ -1149,8 +1200,12 @@ if ($proxy && $uscheme eq "https") { # oh dear, we have to wrap it into a connect request + my $auth = exists $hdr{"proxy-authorization"} + ? "proxy-authorization: " . (delete $hdr{"proxy-authorization"}) . "\015\012" + : ""; + # maybe re-use $uauthority with patched port? - $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012"); + $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012$auth\015\012"); $state{handle}->push_read (line => $qr_nlnl, sub { $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }; @@ -1163,6 +1218,8 @@ } }); } else { + delete $hdr{"proxy-authorization"} unless $proxy; + $handle_actual_request->(); } }; @@ -1178,11 +1235,11 @@ $was_persistent = 1; $state{handle} = ka_fetch $ka_key; - $state{handle}->destroyed - and die "AnyEvent::HTTP: unexpectedly got a destructed handle (1), please report.";#d# +# $state{handle}->destroyed +# and die "AnyEvent::HTTP: unexpectedly got a destructed handle (1), please report.";#d# $prepare_handle->(); - $state{handle}->destroyed - and die "AnyEvent::HTTP: unexpectedly got a destructed handle (2), please report.";#d# +# $state{handle}->destroyed +# and die "AnyEvent::HTTP: unexpectedly got a destructed handle (2), please report.";#d# $handle_actual_request->(); } else { @@ -1250,10 +1307,10 @@ function from time to time. A cookie jar is initially an empty hash-reference that is managed by this -module. It's format is subject to change, but currently it is like this: +module. Its format is subject to change, but currently it is as follows: -The key C has to contain C<1>, otherwise the hash gets -emptied. All other keys are hostnames or IP addresses pointing to +The key C has to contain C<2>, otherwise the hash gets +cleared. All other keys are hostnames or IP addresses pointing to hash-references. The key for these inner hash references is the server path for which this cookie is meant, and the values are again hash-references. Each key of those hash-references is a cookie name, and @@ -1267,7 +1324,7 @@ chance of understanding the above paragraph: { - version => 1, + version => 2, "10.0.0.1" => { "/" => { "mythweb_id" => { @@ -1305,7 +1362,7 @@ =item $AnyEvent::HTTP::MAX_PER_HOST The maximum number of concurrent connections to the same host (identified -by the hostname). If the limit is exceeded, then the additional requests +by the hostname). If the limit is exceeded, then additional requests are queued until previous connections are closed. Both persistent and non-persistent connections are counted in this limit. @@ -1422,8 +1479,6 @@ my %hdr; my $ofs = 0; - warn stat $fh; - warn -s _; if (stat $fh and -s _) { $ofs = -s _; warn "-s is ", $ofs; @@ -1461,7 +1516,7 @@ my $status = $hdr->{Status}; if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) { - utime $fh, $time, $time; + utime $time, $time, $fh; } if ($status == 200 || $status == 206 || $status == 416) {