--- AnyEvent-HTTP/README 2011/01/04 08:29:28 1.15 +++ AnyEvent-HTTP/README 2020/04/27 12:14:12 1.28 @@ -14,7 +14,7 @@ This module implements a simple, stateless and non-blocking HTTP client. It supports GET, POST and other request methods, cookies and more, all - on a very low level. It can follow redirects supports proxies and + on a very low level. It can follow redirects, supports proxies, and automatically limits the number of connections to the values specified in the RFC. @@ -52,7 +52,7 @@ cancelled. The callback will be called with the response body data as first - argument (or "undef" if an error occured), and a hash-ref with + argument (or "undef" if an error occurred), and a hash-ref with response headers (and trailers) as second argument. All the headers in that hash are lowercased. In addition to the @@ -84,7 +84,7 @@ 590-599 and the "Reason" pseudo-header will contain an error message. Currently the following status codes are used: - 595 - errors during connection etsbalishment, proxy handshake. + 595 - errors during connection establishment, proxy handshake. 596 - errors during TLS negotiation, request sending and header processing. 597 - errors during body receiving or processing. @@ -108,7 +108,14 @@ recurse => $count (default: $MAX_RECURSE) Whether to recurse requests or not, e.g. on redirects, - authentication retries and so on, and how often to do so. + authentication and other retries and so on, and how often to do + so. + + Only redirects to http and https URLs are supported. While most + common redirection forms are handled entirely within this + module, some require the use of the optional URI module. If it + is required but missing, then the request will fail with an + error. headers => hashref The request headers to use. Currently, "http_request" may @@ -123,6 +130,9 @@ if the default AnyEvent string gets blocked by webservers sooner or later. + Also, make sure that your headers names and values do not + contain any embedded newlines. + timeout => $seconds The time-out to use for various stages - each connect attempt will reset the timeout, as will read or write activity, i.e. @@ -131,12 +141,24 @@ Default timeout is 5 minutes. proxy => [$host, $port[, $scheme]] or undef - Use the given http proxy for all requests. If not specified, - then the default proxy (as specified by $ENV{http_proxy}) is - used. + Use the given http proxy for all requests, or no proxy if + "undef" is used. $scheme must be either missing or must be "http" for HTTP. + If not specified, then the default proxy is used (see + "AnyEvent::HTTP::set_proxy"). + + Currently, if your proxy requires authorization, you have to + specify an appropriate "Proxy-Authorization" header in every + request. + + Note that this module will prefer an existing persistent + connection, even if that connection was made using another + proxy. If you need to ensure that a new connection is made in + this case, you can either force "persistent" to false or e.g. + use the proxy address in your "sessionid". + body => $string The request body, usually empty. Will be sent as-is (future versions of this module might offer more options). @@ -175,17 +197,19 @@ See also the "sessionid" parameter. - session => $string - The module might reuse connections to the same host internally. - Sometimes (e.g. when using TLS), you do not want to reuse - connections from other sessions. This can be achieved by setting - this parameter to some unique ID (such as the address of an - object storing your state data, or the TLS context) - only - connections using the same unique ID will be reused. + sessionid => $string + The module might reuse connections to the same host internally + (regardless of other settings, such as "tcp_connect" or + "proxy"). Sometimes (e.g. when using TLS or a specfic proxy), + you do not want to reuse connections from other sessions. This + can be achieved by setting this parameter to some unique ID + (such as the address of an object storing your state data or the + TLS context, or the proxy IP) - only connections using the same + unique ID will be reused. on_prepare => $callback->($fh) In rare cases you need to "tune" the socket before it is used to - connect (for exmaple, to bind it on a given IP address). This + connect (for example, to bind it on a given IP address). This parameter overrides the prepare callback passed to "AnyEvent::Socket::tcp_connect" and behaves exactly the same way (e.g. it has to provide a timeout). See the description for the @@ -201,6 +225,13 @@ the same calling conventions, except that it may always return a connection guard object. + The connections made by this hook will be treated as equivalent + to connections made the built-in way, specifically, they will be + put into and taken from the persistent connection cache. If your + $tcp_connect function is incompatible with this kind of re-use, + consider switching off "persistent" connections and/or providing + a "sessionid" identifier. + There are probably lots of weird uses for this function, starting from tracing the hosts "http_request" actually tries to connect, to (inexact but fast) host => IP address caching or @@ -283,8 +314,9 @@ Try to create/reuse a persistent connection. When this flag is set (default: true for idempotent requests, false for all others), then "http_request" tries to re-use an existing - (previously-created) persistent connection to the host and, - failing that, tries to create a new one. + (previously-created) persistent connection to same host (i.e. + identical URL scheme, hostname, port and sessionid) and, failing + that, tries to create a new one. Requests failing in certain ways will be automatically retried once, which is dangerous for non-idempotent requests, which is @@ -295,8 +327,8 @@ request or not. When reusing an existent connection, many parameters (such as - TLS context) will be ignored. See the "session" parameter for a - workaround. + TLS context) will be ignored. See the "sessionid" parameter for + a workaround. keepalive => $boolean Only used when "persistent" is also true. This parameter decides @@ -333,7 +365,7 @@ timeout of 30 seconds. http_request - GET => "https://www.google.com", + HEAD => "https://www.google.com", headers => { "user-agent" => "MySearchClient 1.0" }, timeout => 30, sub { @@ -368,6 +400,10 @@ To clear an already-set proxy, use "undef". + When AnyEvent::HTTP is loaded for the first time it will query the + default proxy from the operating system, currently by looking at + "$ENV{http_proxy"}. + AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] Remove all cookies from the cookie jar that have been expired. If $session_end is given and true, then additionally remove all session @@ -376,27 +412,28 @@ You should call this function (with a true $session_end) before you save cookies to disk, and you should call this function after loading them again. If you have a long-running program you can - additonally call this function from time to time. + additionally call this function from time to time. A cookie jar is initially an empty hash-reference that is managed by - this module. It's format is subject to change, but currently it is - like this: + this module. Its format is subject to change, but currently it is as + follows: - The key "version" has to contain 1, otherwise the hash gets emptied. + The key "version" has to contain 2, otherwise the hash gets cleared. All other keys are hostnames or IP addresses pointing to hash-references. The key for these inner hash references is the server path for which this cookie is meant, and the values are again - hash-references. The keys of those hash-references is the cookie - name, and the value, you guessed it, is another hash-reference, this - time with the key-value pairs from the cookie, except for "expires" - and "max-age", which have been replaced by a "_expires" key that - contains the cookie expiry timestamp. + hash-references. Each key of those hash-references is a cookie name, + and the value, you guessed it, is another hash-reference, this time + with the key-value pairs from the cookie, except for "expires" and + "max-age", which have been replaced by a "_expires" key that + contains the cookie expiry timestamp. Session cookies are indicated + by not having an "_expires" key. Here is an example of a cookie jar with a single cookie, so you have a chance of understanding the above paragraph: { - version => 1, + version => 2, "10.0.0.1" => { "/" => { "mythweb_id" => { @@ -421,7 +458,7 @@ The default value for the "recurse" request parameter (default: 10). $AnyEvent::HTTP::TIMEOUT - The default timeout for conenction operations (default: 300). + The default timeout for connection operations (default: 300). $AnyEvent::HTTP::USERAGENT The default value for the "User-Agent" header (the default is @@ -430,7 +467,7 @@ $AnyEvent::HTTP::MAX_PER_HOST The maximum number of concurrent connections to the same host - (identified by the hostname). If the limit is exceeded, then the + (identified by the hostname). If the limit is exceeded, then additional requests are queued until previous connections are closed. Both persistent and non-persistent connections are counted in this limit. @@ -439,12 +476,12 @@ increase it much. For comparison: the RFC's recommend 4 non-persistent or 2 persistent - connections, older browsers used 2, newers (such as firefox 3) + connections, older browsers used 2, newer ones (such as firefox 3) typically use 6, and Opera uses 8 because like, they have the fastest browser and give a shit for everybody else on the planet. $AnyEvent::HTTP::PERSISTENT_TIMEOUT - The time after which idle persistent conenctions get closed by + The time after which idle persistent connections get closed by AnyEvent::HTTP (default: 3). $AnyEvent::HTTP::ACTIVE @@ -453,7 +490,105 @@ non-idle TCP connections. This number can be useful for load-leveling. - SOCKS PROXIES + SHOWCASE + This section contains some more elaborate "real-world" examples or code + snippets. + + HTTP/1.1 FILE DOWNLOAD + Downloading files with HTTP can be quite tricky, especially when + something goes wrong and you want to resume. + + Here is a function that initiates and resumes a download. It uses the + last modified time to check for file content changes, and works with + many HTTP/1.0 servers as well, and usually falls back to a complete + re-download on older servers. + + It calls the completion callback with either "undef", which means a + nonretryable error occurred, 0 when the download was partial and should + be retried, and 1 if it was successful. + + use AnyEvent::HTTP; + + sub download($$$) { + my ($url, $file, $cb) = @_; + + open my $fh, "+<", $file + or die "$file: $!"; + + my %hdr; + my $ofs = 0; + + if (stat $fh and -s _) { + $ofs = -s _; + warn "-s is ", $ofs; + $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; + $hdr{"range"} = "bytes=$ofs-"; + } + + http_get $url, + headers => \%hdr, + on_header => sub { + my ($hdr) = @_; + + if ($hdr->{Status} == 200 && $ofs) { + # resume failed + truncate $fh, $ofs = 0; + } + + sysseek $fh, $ofs, 0; + + 1 + }, + on_body => sub { + my ($data, $hdr) = @_; + + if ($hdr->{Status} =~ /^2/) { + length $data == syswrite $fh, $data + or return; # abort on write errors + } + + 1 + }, + sub { + my (undef, $hdr) = @_; + + my $status = $hdr->{Status}; + + if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) { + utime $time, $time, $fh; + } + + if ($status == 200 || $status == 206 || $status == 416) { + # download ok || resume ok || file already fully downloaded + $cb->(1, $hdr); + + } elsif ($status == 412) { + # file has changed while resuming, delete and retry + unlink $file; + $cb->(0, $hdr); + + } elsif ($status == 500 or $status == 503 or $status =~ /^59/) { + # retry later + $cb->(0, $hdr); + + } else { + $cb->(undef, $hdr); + } + } + ; + } + + download "http://server/somelargefile", "/tmp/somelargefile", sub { + if ($_[0]) { + print "OK!\n"; + } elsif (defined $_[0]) { + print "please retry later\n"; + } else { + print "ERROR\n"; + } + }; + + SOCKS PROXIES Socks proxies are not directly supported by AnyEvent::HTTP. You can compile your perl to support socks, or use an external program such as socksify (dante) or tsocks to make your program use a socks proxy @@ -515,6 +650,6 @@ Marc Lehmann http://home.schmorp.de/ - With many thanks to Дмитрий Шалашов, who provided - countless testcases and bugreports. + With many thanks to Дмитрий Шалашов, who provided countless testcases + and bugreports.