--- AnyEvent-HTTP/README 2011/01/04 08:29:28 1.15 +++ AnyEvent-HTTP/README 2016/08/28 09:31:29 1.27 @@ -14,7 +14,7 @@ This module implements a simple, stateless and non-blocking HTTP client. It supports GET, POST and other request methods, cookies and more, all - on a very low level. It can follow redirects supports proxies and + on a very low level. It can follow redirects, supports proxies, and automatically limits the number of connections to the values specified in the RFC. @@ -52,7 +52,7 @@ cancelled. The callback will be called with the response body data as first - argument (or "undef" if an error occured), and a hash-ref with + argument (or "undef" if an error occurred), and a hash-ref with response headers (and trailers) as second argument. All the headers in that hash are lowercased. In addition to the @@ -84,7 +84,7 @@ 590-599 and the "Reason" pseudo-header will contain an error message. Currently the following status codes are used: - 595 - errors during connection etsbalishment, proxy handshake. + 595 - errors during connection establishment, proxy handshake. 596 - errors during TLS negotiation, request sending and header processing. 597 - errors during body receiving or processing. @@ -108,7 +108,14 @@ recurse => $count (default: $MAX_RECURSE) Whether to recurse requests or not, e.g. on redirects, - authentication retries and so on, and how often to do so. + authentication and other retries and so on, and how often to do + so. + + Only redirects to http and https URLs are supported. While most + common redirection forms are handled entirely within this + module, some require the use of the optional URI module. If it + is required but missing, then the request will fail with an + error. headers => hashref The request headers to use. Currently, "http_request" may @@ -123,6 +130,9 @@ if the default AnyEvent string gets blocked by webservers sooner or later. + Also, make sure that your headers names and values do not + contain any embedded newlines. + timeout => $seconds The time-out to use for various stages - each connect attempt will reset the timeout, as will read or write activity, i.e. @@ -131,12 +141,18 @@ Default timeout is 5 minutes. proxy => [$host, $port[, $scheme]] or undef - Use the given http proxy for all requests. If not specified, - then the default proxy (as specified by $ENV{http_proxy}) is - used. + Use the given http proxy for all requests, or no proxy if + "undef" is used. $scheme must be either missing or must be "http" for HTTP. + If not specified, then the default proxy is used (see + "AnyEvent::HTTP::set_proxy"). + + Currently, if your proxy requires authorization, you have to + specify an appropriate "Proxy-Authorization" header in every + request. + body => $string The request body, usually empty. Will be sent as-is (future versions of this module might offer more options). @@ -185,7 +201,7 @@ on_prepare => $callback->($fh) In rare cases you need to "tune" the socket before it is used to - connect (for exmaple, to bind it on a given IP address). This + connect (for example, to bind it on a given IP address). This parameter overrides the prepare callback passed to "AnyEvent::Socket::tcp_connect" and behaves exactly the same way (e.g. it has to provide a timeout). See the description for the @@ -333,7 +349,7 @@ timeout of 30 seconds. http_request - GET => "https://www.google.com", + HEAD => "https://www.google.com", headers => { "user-agent" => "MySearchClient 1.0" }, timeout => 30, sub { @@ -368,6 +384,10 @@ To clear an already-set proxy, use "undef". + When AnyEvent::HTTP is loaded for the first time it will query the + default proxy from the operating system, currently by looking at + "$ENV{http_proxy"}. + AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] Remove all cookies from the cookie jar that have been expired. If $session_end is given and true, then additionally remove all session @@ -376,21 +396,22 @@ You should call this function (with a true $session_end) before you save cookies to disk, and you should call this function after loading them again. If you have a long-running program you can - additonally call this function from time to time. + additionally call this function from time to time. A cookie jar is initially an empty hash-reference that is managed by - this module. It's format is subject to change, but currently it is - like this: + this module. Its format is subject to change, but currently it is as + follows: The key "version" has to contain 1, otherwise the hash gets emptied. All other keys are hostnames or IP addresses pointing to hash-references. The key for these inner hash references is the server path for which this cookie is meant, and the values are again - hash-references. The keys of those hash-references is the cookie - name, and the value, you guessed it, is another hash-reference, this - time with the key-value pairs from the cookie, except for "expires" - and "max-age", which have been replaced by a "_expires" key that - contains the cookie expiry timestamp. + hash-references. Each key of those hash-references is a cookie name, + and the value, you guessed it, is another hash-reference, this time + with the key-value pairs from the cookie, except for "expires" and + "max-age", which have been replaced by a "_expires" key that + contains the cookie expiry timestamp. Session cookies are indicated + by not having an "_expires" key. Here is an example of a cookie jar with a single cookie, so you have a chance of understanding the above paragraph: @@ -421,7 +442,7 @@ The default value for the "recurse" request parameter (default: 10). $AnyEvent::HTTP::TIMEOUT - The default timeout for conenction operations (default: 300). + The default timeout for connection operations (default: 300). $AnyEvent::HTTP::USERAGENT The default value for the "User-Agent" header (the default is @@ -430,7 +451,7 @@ $AnyEvent::HTTP::MAX_PER_HOST The maximum number of concurrent connections to the same host - (identified by the hostname). If the limit is exceeded, then the + (identified by the hostname). If the limit is exceeded, then additional requests are queued until previous connections are closed. Both persistent and non-persistent connections are counted in this limit. @@ -439,12 +460,12 @@ increase it much. For comparison: the RFC's recommend 4 non-persistent or 2 persistent - connections, older browsers used 2, newers (such as firefox 3) + connections, older browsers used 2, newer ones (such as firefox 3) typically use 6, and Opera uses 8 because like, they have the fastest browser and give a shit for everybody else on the planet. $AnyEvent::HTTP::PERSISTENT_TIMEOUT - The time after which idle persistent conenctions get closed by + The time after which idle persistent connections get closed by AnyEvent::HTTP (default: 3). $AnyEvent::HTTP::ACTIVE @@ -453,7 +474,105 @@ non-idle TCP connections. This number can be useful for load-leveling. - SOCKS PROXIES + SHOWCASE + This section contains some more elaborate "real-world" examples or code + snippets. + + HTTP/1.1 FILE DOWNLOAD + Downloading files with HTTP can be quite tricky, especially when + something goes wrong and you want to resume. + + Here is a function that initiates and resumes a download. It uses the + last modified time to check for file content changes, and works with + many HTTP/1.0 servers as well, and usually falls back to a complete + re-download on older servers. + + It calls the completion callback with either "undef", which means a + nonretryable error occurred, 0 when the download was partial and should + be retried, and 1 if it was successful. + + use AnyEvent::HTTP; + + sub download($$$) { + my ($url, $file, $cb) = @_; + + open my $fh, "+<", $file + or die "$file: $!"; + + my %hdr; + my $ofs = 0; + + if (stat $fh and -s _) { + $ofs = -s _; + warn "-s is ", $ofs; + $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; + $hdr{"range"} = "bytes=$ofs-"; + } + + http_get $url, + headers => \%hdr, + on_header => sub { + my ($hdr) = @_; + + if ($hdr->{Status} == 200 && $ofs) { + # resume failed + truncate $fh, $ofs = 0; + } + + sysseek $fh, $ofs, 0; + + 1 + }, + on_body => sub { + my ($data, $hdr) = @_; + + if ($hdr->{Status} =~ /^2/) { + length $data == syswrite $fh, $data + or return; # abort on write errors + } + + 1 + }, + sub { + my (undef, $hdr) = @_; + + my $status = $hdr->{Status}; + + if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) { + utime $time, $time, $fh; + } + + if ($status == 200 || $status == 206 || $status == 416) { + # download ok || resume ok || file already fully downloaded + $cb->(1, $hdr); + + } elsif ($status == 412) { + # file has changed while resuming, delete and retry + unlink $file; + $cb->(0, $hdr); + + } elsif ($status == 500 or $status == 503 or $status =~ /^59/) { + # retry later + $cb->(0, $hdr); + + } else { + $cb->(undef, $hdr); + } + } + ; + } + + download "http://server/somelargefile", "/tmp/somelargefile", sub { + if ($_[0]) { + print "OK!\n"; + } elsif (defined $_[0]) { + print "please retry later\n"; + } else { + print "ERROR\n"; + } + }; + + SOCKS PROXIES Socks proxies are not directly supported by AnyEvent::HTTP. You can compile your perl to support socks, or use an external program such as socksify (dante) or tsocks to make your program use a socks proxy @@ -515,6 +634,6 @@ Marc Lehmann http://home.schmorp.de/ - With many thanks to Дмитрий Шалашов, who provided - countless testcases and bugreports. + With many thanks to Дмитрий Шалашов, who provided countless testcases + and bugreports.