1 |
root |
1.1 |
NAME |
2 |
root |
1.2 |
AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client |
3 |
root |
1.1 |
|
4 |
|
|
SYNOPSIS |
5 |
root |
1.2 |
use AnyEvent::HTTP; |
6 |
root |
1.1 |
|
7 |
root |
1.3 |
http_get "http://www.nethype.de/", sub { print $_[1] }; |
8 |
|
|
|
9 |
|
|
# ... do something else here |
10 |
|
|
|
11 |
root |
1.1 |
DESCRIPTION |
12 |
|
|
This module is an AnyEvent user, you need to make sure that you use and |
13 |
|
|
run a supported event loop. |
14 |
|
|
|
15 |
root |
1.2 |
This module implements a simple, stateless and non-blocking HTTP client. |
16 |
|
|
It supports GET, POST and other request methods, cookies and more, all |
17 |
root |
1.17 |
on a very low level. It can follow redirects, supports proxies, and |
18 |
root |
1.2 |
automatically limits the number of connections to the values specified |
19 |
|
|
in the RFC. |
20 |
|
|
|
21 |
|
|
It should generally be a "good client" that is enough for most HTTP |
22 |
|
|
tasks. Simple tasks should be simple, but complex tasks should still be |
23 |
|
|
possible as the user retains control over request and response headers. |
24 |
|
|
|
25 |
|
|
The caller is responsible for authentication management, cookies (if the |
26 |
|
|
simplistic implementation in this module doesn't suffice), referer and |
27 |
|
|
other high-level protocol details for which this module offers only |
28 |
|
|
limited support. |
29 |
|
|
|
30 |
|
|
METHODS |
31 |
|
|
http_get $url, key => value..., $cb->($data, $headers) |
32 |
|
|
Executes an HTTP-GET request. See the http_request function for |
33 |
root |
1.5 |
details on additional parameters and the return value. |
34 |
root |
1.2 |
|
35 |
|
|
http_head $url, key => value..., $cb->($data, $headers) |
36 |
|
|
Executes an HTTP-HEAD request. See the http_request function for |
37 |
root |
1.5 |
details on additional parameters and the return value. |
38 |
root |
1.2 |
|
39 |
|
|
http_post $url, $body, key => value..., $cb->($data, $headers) |
40 |
root |
1.4 |
Executes an HTTP-POST request with a request body of $body. See the |
41 |
root |
1.5 |
http_request function for details on additional parameters and the |
42 |
|
|
return value. |
43 |
root |
1.2 |
|
44 |
|
|
http_request $method => $url, key => value..., $cb->($data, $headers) |
45 |
|
|
Executes a HTTP request of type $method (e.g. "GET", "POST"). The |
46 |
|
|
URL must be an absolute http or https URL. |
47 |
|
|
|
48 |
root |
1.5 |
When called in void context, nothing is returned. In other contexts, |
49 |
|
|
"http_request" returns a "cancellation guard" - you have to keep the |
50 |
|
|
object at least alive until the callback get called. If the object |
51 |
root |
1.14 |
gets destroyed before the callback is called, the request will be |
52 |
root |
1.5 |
cancelled. |
53 |
|
|
|
54 |
root |
1.8 |
The callback will be called with the response body data as first |
55 |
|
|
argument (or "undef" if an error occured), and a hash-ref with |
56 |
root |
1.15 |
response headers (and trailers) as second argument. |
57 |
root |
1.2 |
|
58 |
|
|
All the headers in that hash are lowercased. In addition to the |
59 |
root |
1.13 |
response headers, the "pseudo-headers" (uppercase to avoid clashing |
60 |
|
|
with possible response headers) "HTTPVersion", "Status" and "Reason" |
61 |
root |
1.14 |
contain the three parts of the HTTP Status-Line of the same name. If |
62 |
|
|
an error occurs during the body phase of a request, then the |
63 |
|
|
original "Status" and "Reason" values from the header are available |
64 |
|
|
as "OrigStatus" and "OrigReason". |
65 |
root |
1.13 |
|
66 |
|
|
The pseudo-header "URL" contains the actual URL (which can differ |
67 |
|
|
from the requested URL when following redirects - for example, you |
68 |
|
|
might get an error that your URL scheme is not supported even though |
69 |
|
|
your URL is a valid http URL because it redirected to an ftp URL, in |
70 |
|
|
which case you can look at the URL pseudo header). |
71 |
|
|
|
72 |
|
|
The pseudo-header "Redirect" only exists when the request was a |
73 |
|
|
result of an internal redirect. In that case it is an array |
74 |
|
|
reference with the "($data, $headers)" from the redirect response. |
75 |
|
|
Note that this response could in turn be the result of a redirect |
76 |
|
|
itself, and "$headers->{Redirect}[1]{Redirect}" will then contain |
77 |
|
|
the original response, and so on. |
78 |
root |
1.3 |
|
79 |
root |
1.6 |
If the server sends a header multiple times, then their contents |
80 |
|
|
will be joined together with a comma (","), as per the HTTP spec. |
81 |
root |
1.2 |
|
82 |
|
|
If an internal error occurs, such as not being able to resolve a |
83 |
|
|
hostname, then $data will be "undef", "$headers->{Status}" will be |
84 |
root |
1.15 |
590-599 and the "Reason" pseudo-header will contain an error |
85 |
|
|
message. Currently the following status codes are used: |
86 |
|
|
|
87 |
|
|
595 - errors during connection etsbalishment, proxy handshake. |
88 |
|
|
596 - errors during TLS negotiation, request sending and header |
89 |
|
|
processing. |
90 |
|
|
597 - errors during body receiving or processing. |
91 |
|
|
598 - user aborted request via "on_header" or "on_body". |
92 |
|
|
599 - other, usually nonretryable, errors (garbled URL etc.). |
93 |
root |
1.2 |
|
94 |
|
|
A typical callback might look like this: |
95 |
|
|
|
96 |
|
|
sub { |
97 |
|
|
my ($body, $hdr) = @_; |
98 |
|
|
|
99 |
|
|
if ($hdr->{Status} =~ /^2/) { |
100 |
|
|
... everything should be ok |
101 |
|
|
} else { |
102 |
|
|
print "error, $hdr->{Status} $hdr->{Reason}\n"; |
103 |
|
|
} |
104 |
|
|
} |
105 |
|
|
|
106 |
|
|
Additional parameters are key-value pairs, and are fully optional. |
107 |
|
|
They include: |
108 |
|
|
|
109 |
|
|
recurse => $count (default: $MAX_RECURSE) |
110 |
|
|
Whether to recurse requests or not, e.g. on redirects, |
111 |
|
|
authentication retries and so on, and how often to do so. |
112 |
|
|
|
113 |
|
|
headers => hashref |
114 |
|
|
The request headers to use. Currently, "http_request" may |
115 |
|
|
provide its own "Host:", "Content-Length:", "Connection:" and |
116 |
root |
1.15 |
"Cookie:" headers and will provide defaults at least for "TE:", |
117 |
|
|
"Referer:" and "User-Agent:" (this can be suppressed by using |
118 |
|
|
"undef" for these headers in which case they won't be sent at |
119 |
|
|
all). |
120 |
|
|
|
121 |
|
|
You really should provide your own "User-Agent:" header value |
122 |
|
|
that is appropriate for your program - I wouldn't be surprised |
123 |
|
|
if the default AnyEvent string gets blocked by webservers sooner |
124 |
|
|
or later. |
125 |
root |
1.2 |
|
126 |
|
|
timeout => $seconds |
127 |
|
|
The time-out to use for various stages - each connect attempt |
128 |
root |
1.11 |
will reset the timeout, as will read or write activity, i.e. |
129 |
|
|
this is not an overall timeout. |
130 |
|
|
|
131 |
|
|
Default timeout is 5 minutes. |
132 |
root |
1.2 |
|
133 |
|
|
proxy => [$host, $port[, $scheme]] or undef |
134 |
|
|
Use the given http proxy for all requests. If not specified, |
135 |
|
|
then the default proxy (as specified by $ENV{http_proxy}) is |
136 |
|
|
used. |
137 |
|
|
|
138 |
root |
1.15 |
$scheme must be either missing or must be "http" for HTTP. |
139 |
root |
1.2 |
|
140 |
|
|
body => $string |
141 |
root |
1.15 |
The request body, usually empty. Will be sent as-is (future |
142 |
root |
1.2 |
versions of this module might offer more options). |
143 |
|
|
|
144 |
|
|
cookie_jar => $hash_ref |
145 |
|
|
Passing this parameter enables (simplified) cookie-processing, |
146 |
|
|
loosely based on the original netscape specification. |
147 |
|
|
|
148 |
|
|
The $hash_ref must be an (initially empty) hash reference which |
149 |
|
|
will get updated automatically. It is possible to save the |
150 |
root |
1.15 |
cookie jar to persistent storage with something like JSON or |
151 |
|
|
Storable - see the "AnyEvent::HTTP::cookie_jar_expire" function |
152 |
|
|
if you wish to remove expired or session-only cookies, and also |
153 |
|
|
for documentation on the format of the cookie jar. |
154 |
|
|
|
155 |
|
|
Note that this cookie implementation is not meant to be |
156 |
|
|
complete. If you want complete cookie management you have to do |
157 |
|
|
that on your own. "cookie_jar" is meant as a quick fix to get |
158 |
|
|
most cookie-using sites working. Cookies are a privacy disaster, |
159 |
|
|
do not use them unless required to. |
160 |
|
|
|
161 |
|
|
When cookie processing is enabled, the "Cookie:" and |
162 |
|
|
"Set-Cookie:" headers will be set and handled by this module, |
163 |
|
|
otherwise they will be left untouched. |
164 |
root |
1.2 |
|
165 |
root |
1.8 |
tls_ctx => $scheme | $tls_ctx |
166 |
|
|
Specifies the AnyEvent::TLS context to be used for https |
167 |
|
|
connections. This parameter follows the same rules as the |
168 |
|
|
"tls_ctx" parameter to AnyEvent::Handle, but additionally, the |
169 |
|
|
two strings "low" or "high" can be specified, which give you a |
170 |
|
|
predefined low-security (no verification, highest compatibility) |
171 |
|
|
and high-security (CA and common-name verification) TLS context. |
172 |
|
|
|
173 |
|
|
The default for this option is "low", which could be interpreted |
174 |
|
|
as "give me the page, no matter what". |
175 |
|
|
|
176 |
root |
1.15 |
See also the "sessionid" parameter. |
177 |
|
|
|
178 |
|
|
session => $string |
179 |
|
|
The module might reuse connections to the same host internally. |
180 |
|
|
Sometimes (e.g. when using TLS), you do not want to reuse |
181 |
|
|
connections from other sessions. This can be achieved by setting |
182 |
|
|
this parameter to some unique ID (such as the address of an |
183 |
|
|
object storing your state data, or the TLS context) - only |
184 |
|
|
connections using the same unique ID will be reused. |
185 |
|
|
|
186 |
root |
1.11 |
on_prepare => $callback->($fh) |
187 |
|
|
In rare cases you need to "tune" the socket before it is used to |
188 |
|
|
connect (for exmaple, to bind it on a given IP address). This |
189 |
|
|
parameter overrides the prepare callback passed to |
190 |
|
|
"AnyEvent::Socket::tcp_connect" and behaves exactly the same way |
191 |
|
|
(e.g. it has to provide a timeout). See the description for the |
192 |
|
|
$prepare_cb argument of "AnyEvent::Socket::tcp_connect" for |
193 |
|
|
details. |
194 |
|
|
|
195 |
root |
1.14 |
tcp_connect => $callback->($host, $service, $connect_cb, |
196 |
|
|
$prepare_cb) |
197 |
|
|
In even rarer cases you want total control over how |
198 |
|
|
AnyEvent::HTTP establishes connections. Normally it uses |
199 |
|
|
AnyEvent::Socket::tcp_connect to do this, but you can provide |
200 |
|
|
your own "tcp_connect" function - obviously, it has to follow |
201 |
|
|
the same calling conventions, except that it may always return a |
202 |
|
|
connection guard object. |
203 |
|
|
|
204 |
|
|
There are probably lots of weird uses for this function, |
205 |
|
|
starting from tracing the hosts "http_request" actually tries to |
206 |
|
|
connect, to (inexact but fast) host => IP address caching or |
207 |
|
|
even socks protocol support. |
208 |
|
|
|
209 |
root |
1.8 |
on_header => $callback->($headers) |
210 |
|
|
When specified, this callback will be called with the header |
211 |
|
|
hash as soon as headers have been successfully received from the |
212 |
|
|
remote server (not on locally-generated errors). |
213 |
|
|
|
214 |
|
|
It has to return either true (in which case AnyEvent::HTTP will |
215 |
|
|
continue), or false, in which case AnyEvent::HTTP will cancel |
216 |
|
|
the download (and call the finish callback with an error code of |
217 |
|
|
598). |
218 |
|
|
|
219 |
|
|
This callback is useful, among other things, to quickly reject |
220 |
|
|
unwanted content, which, if it is supposed to be rare, can be |
221 |
|
|
faster than first doing a "HEAD" request. |
222 |
|
|
|
223 |
root |
1.15 |
The downside is that cancelling the request makes it impossible |
224 |
|
|
to re-use the connection. Also, the "on_header" callback will |
225 |
|
|
not receive any trailer (headers sent after the response body). |
226 |
|
|
|
227 |
root |
1.8 |
Example: cancel the request unless the content-type is |
228 |
|
|
"text/html". |
229 |
|
|
|
230 |
|
|
on_header => sub { |
231 |
|
|
$_[0]{"content-type"} =~ /^text\/html\s*(?:;|$)/ |
232 |
|
|
}, |
233 |
|
|
|
234 |
|
|
on_body => $callback->($partial_body, $headers) |
235 |
|
|
When specified, all body data will be passed to this callback |
236 |
|
|
instead of to the completion callback. The completion callback |
237 |
|
|
will get the empty string instead of the body data. |
238 |
|
|
|
239 |
|
|
It has to return either true (in which case AnyEvent::HTTP will |
240 |
|
|
continue), or false, in which case AnyEvent::HTTP will cancel |
241 |
|
|
the download (and call the completion callback with an error |
242 |
|
|
code of 598). |
243 |
|
|
|
244 |
root |
1.15 |
The downside to cancelling the request is that it makes it |
245 |
|
|
impossible to re-use the connection. |
246 |
|
|
|
247 |
root |
1.8 |
This callback is useful when the data is too large to be held in |
248 |
|
|
memory (so the callback writes it to a file) or when only some |
249 |
|
|
information should be extracted, or when the body should be |
250 |
|
|
processed incrementally. |
251 |
|
|
|
252 |
|
|
It is usually preferred over doing your own body handling via |
253 |
root |
1.9 |
"want_body_handle", but in case of streaming APIs, where HTTP is |
254 |
|
|
only used to create a connection, "want_body_handle" is the |
255 |
|
|
better alternative, as it allows you to install your own event |
256 |
|
|
handler, reducing resource usage. |
257 |
root |
1.8 |
|
258 |
|
|
want_body_handle => $enable |
259 |
|
|
When enabled (default is disabled), the behaviour of |
260 |
|
|
AnyEvent::HTTP changes considerably: after parsing the headers, |
261 |
|
|
and instead of downloading the body (if any), the completion |
262 |
|
|
callback will be called. Instead of the $body argument |
263 |
|
|
containing the body data, the callback will receive the |
264 |
|
|
AnyEvent::Handle object associated with the connection. In error |
265 |
|
|
cases, "undef" will be passed. When there is no body (e.g. |
266 |
|
|
status 304), the empty string will be passed. |
267 |
|
|
|
268 |
|
|
The handle object might or might not be in TLS mode, might be |
269 |
root |
1.15 |
connected to a proxy, be a persistent connection, use chunked |
270 |
|
|
transfer encoding etc., and configured in unspecified ways. The |
271 |
|
|
user is responsible for this handle (it will not be used by this |
272 |
|
|
module anymore). |
273 |
root |
1.8 |
|
274 |
|
|
This is useful with some push-type services, where, after the |
275 |
|
|
initial headers, an interactive protocol is used (typical |
276 |
|
|
example would be the push-style twitter API which starts a |
277 |
|
|
JSON/XML stream). |
278 |
|
|
|
279 |
|
|
If you think you need this, first have a look at "on_body", to |
280 |
root |
1.9 |
see if that doesn't solve your problem in a better way. |
281 |
root |
1.8 |
|
282 |
root |
1.15 |
persistent => $boolean |
283 |
|
|
Try to create/reuse a persistent connection. When this flag is |
284 |
|
|
set (default: true for idempotent requests, false for all |
285 |
|
|
others), then "http_request" tries to re-use an existing |
286 |
|
|
(previously-created) persistent connection to the host and, |
287 |
|
|
failing that, tries to create a new one. |
288 |
|
|
|
289 |
|
|
Requests failing in certain ways will be automatically retried |
290 |
|
|
once, which is dangerous for non-idempotent requests, which is |
291 |
|
|
why it defaults to off for them. The reason for this is because |
292 |
|
|
the bozos who designed HTTP/1.1 made it impossible to |
293 |
|
|
distinguish between a fatal error and a normal connection |
294 |
|
|
timeout, so you never know whether there was a problem with your |
295 |
|
|
request or not. |
296 |
|
|
|
297 |
|
|
When reusing an existent connection, many parameters (such as |
298 |
|
|
TLS context) will be ignored. See the "session" parameter for a |
299 |
|
|
workaround. |
300 |
|
|
|
301 |
|
|
keepalive => $boolean |
302 |
|
|
Only used when "persistent" is also true. This parameter decides |
303 |
|
|
whether "http_request" tries to handshake a HTTP/1.0-style |
304 |
|
|
keep-alive connection (as opposed to only a HTTP/1.1 persistent |
305 |
|
|
connection). |
306 |
|
|
|
307 |
|
|
The default is true, except when using a proxy, in which case it |
308 |
|
|
defaults to false, as HTTP/1.0 proxies cannot support this in a |
309 |
|
|
meaningful way. |
310 |
|
|
|
311 |
|
|
handle_params => { key => value ... } |
312 |
|
|
The key-value pairs in this hash will be passed to any |
313 |
|
|
AnyEvent::Handle constructor that is called - not all requests |
314 |
|
|
will create a handle, and sometimes more than one is created, so |
315 |
|
|
this parameter is only good for setting hints. |
316 |
|
|
|
317 |
|
|
Example: set the maximum read size to 4096, to potentially |
318 |
|
|
conserve memory at the cost of speed. |
319 |
|
|
|
320 |
|
|
handle_params => { |
321 |
|
|
max_read_size => 4096, |
322 |
|
|
}, |
323 |
|
|
|
324 |
|
|
Example: do a simple HTTP GET request for http://www.nethype.de/ and |
325 |
|
|
print the response body. |
326 |
root |
1.2 |
|
327 |
|
|
http_request GET => "http://www.nethype.de/", sub { |
328 |
|
|
my ($body, $hdr) = @_; |
329 |
|
|
print "$body\n"; |
330 |
|
|
}; |
331 |
|
|
|
332 |
root |
1.15 |
Example: do a HTTP HEAD request on https://www.google.com/, use a |
333 |
root |
1.2 |
timeout of 30 seconds. |
334 |
|
|
|
335 |
|
|
http_request |
336 |
|
|
GET => "https://www.google.com", |
337 |
root |
1.15 |
headers => { "user-agent" => "MySearchClient 1.0" }, |
338 |
root |
1.2 |
timeout => 30, |
339 |
|
|
sub { |
340 |
|
|
my ($body, $hdr) = @_; |
341 |
|
|
use Data::Dumper; |
342 |
|
|
print Dumper $hdr; |
343 |
|
|
} |
344 |
|
|
; |
345 |
|
|
|
346 |
root |
1.15 |
Example: do another simple HTTP GET request, but immediately try to |
347 |
|
|
cancel it. |
348 |
root |
1.5 |
|
349 |
|
|
my $request = http_request GET => "http://www.nethype.de/", sub { |
350 |
|
|
my ($body, $hdr) = @_; |
351 |
|
|
print "$body\n"; |
352 |
|
|
}; |
353 |
|
|
|
354 |
|
|
undef $request; |
355 |
|
|
|
356 |
root |
1.13 |
DNS CACHING |
357 |
|
|
AnyEvent::HTTP uses the AnyEvent::Socket::tcp_connect function for the |
358 |
|
|
actual connection, which in turn uses AnyEvent::DNS to resolve |
359 |
|
|
hostnames. The latter is a simple stub resolver and does no caching on |
360 |
|
|
its own. If you want DNS caching, you currently have to provide your own |
361 |
|
|
default resolver (by storing a suitable resolver object in |
362 |
root |
1.15 |
$AnyEvent::DNS::RESOLVER) or your own "tcp_connect" callback. |
363 |
root |
1.13 |
|
364 |
root |
1.2 |
GLOBAL FUNCTIONS AND VARIABLES |
365 |
|
|
AnyEvent::HTTP::set_proxy "proxy-url" |
366 |
|
|
Sets the default proxy server to use. The proxy-url must begin with |
367 |
root |
1.15 |
a string of the form "http://host:port", croaks otherwise. |
368 |
root |
1.12 |
|
369 |
|
|
To clear an already-set proxy, use "undef". |
370 |
root |
1.2 |
|
371 |
root |
1.15 |
AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] |
372 |
|
|
Remove all cookies from the cookie jar that have been expired. If |
373 |
|
|
$session_end is given and true, then additionally remove all session |
374 |
|
|
cookies. |
375 |
|
|
|
376 |
|
|
You should call this function (with a true $session_end) before you |
377 |
|
|
save cookies to disk, and you should call this function after |
378 |
|
|
loading them again. If you have a long-running program you can |
379 |
|
|
additonally call this function from time to time. |
380 |
|
|
|
381 |
|
|
A cookie jar is initially an empty hash-reference that is managed by |
382 |
|
|
this module. It's format is subject to change, but currently it is |
383 |
|
|
like this: |
384 |
|
|
|
385 |
|
|
The key "version" has to contain 1, otherwise the hash gets emptied. |
386 |
|
|
All other keys are hostnames or IP addresses pointing to |
387 |
|
|
hash-references. The key for these inner hash references is the |
388 |
|
|
server path for which this cookie is meant, and the values are again |
389 |
|
|
hash-references. The keys of those hash-references is the cookie |
390 |
|
|
name, and the value, you guessed it, is another hash-reference, this |
391 |
|
|
time with the key-value pairs from the cookie, except for "expires" |
392 |
|
|
and "max-age", which have been replaced by a "_expires" key that |
393 |
|
|
contains the cookie expiry timestamp. |
394 |
|
|
|
395 |
|
|
Here is an example of a cookie jar with a single cookie, so you have |
396 |
|
|
a chance of understanding the above paragraph: |
397 |
|
|
|
398 |
|
|
{ |
399 |
|
|
version => 1, |
400 |
|
|
"10.0.0.1" => { |
401 |
|
|
"/" => { |
402 |
|
|
"mythweb_id" => { |
403 |
|
|
_expires => 1293917923, |
404 |
|
|
value => "ooRung9dThee3ooyXooM1Ohm", |
405 |
|
|
}, |
406 |
|
|
}, |
407 |
|
|
}, |
408 |
|
|
} |
409 |
|
|
|
410 |
root |
1.14 |
$date = AnyEvent::HTTP::format_date $timestamp |
411 |
|
|
Takes a POSIX timestamp (seconds since the epoch) and formats it as |
412 |
|
|
a HTTP Date (RFC 2616). |
413 |
|
|
|
414 |
|
|
$timestamp = AnyEvent::HTTP::parse_date $date |
415 |
root |
1.15 |
Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) |
416 |
|
|
or a bunch of minor variations of those, and returns the |
417 |
|
|
corresponding POSIX timestamp, or "undef" if the date cannot be |
418 |
|
|
parsed. |
419 |
root |
1.14 |
|
420 |
root |
1.2 |
$AnyEvent::HTTP::MAX_RECURSE |
421 |
|
|
The default value for the "recurse" request parameter (default: 10). |
422 |
|
|
|
423 |
root |
1.15 |
$AnyEvent::HTTP::TIMEOUT |
424 |
|
|
The default timeout for conenction operations (default: 300). |
425 |
|
|
|
426 |
root |
1.2 |
$AnyEvent::HTTP::USERAGENT |
427 |
|
|
The default value for the "User-Agent" header (the default is |
428 |
root |
1.8 |
"Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; |
429 |
root |
1.2 |
+http://software.schmorp.de/pkg/AnyEvent)"). |
430 |
|
|
|
431 |
root |
1.8 |
$AnyEvent::HTTP::MAX_PER_HOST |
432 |
root |
1.10 |
The maximum number of concurrent connections to the same host |
433 |
root |
1.8 |
(identified by the hostname). If the limit is exceeded, then the |
434 |
|
|
additional requests are queued until previous connections are |
435 |
root |
1.15 |
closed. Both persistent and non-persistent connections are counted |
436 |
|
|
in this limit. |
437 |
root |
1.2 |
|
438 |
root |
1.8 |
The default value for this is 4, and it is highly advisable to not |
439 |
root |
1.15 |
increase it much. |
440 |
|
|
|
441 |
|
|
For comparison: the RFC's recommend 4 non-persistent or 2 persistent |
442 |
|
|
connections, older browsers used 2, newers (such as firefox 3) |
443 |
|
|
typically use 6, and Opera uses 8 because like, they have the |
444 |
|
|
fastest browser and give a shit for everybody else on the planet. |
445 |
|
|
|
446 |
|
|
$AnyEvent::HTTP::PERSISTENT_TIMEOUT |
447 |
|
|
The time after which idle persistent conenctions get closed by |
448 |
|
|
AnyEvent::HTTP (default: 3). |
449 |
root |
1.2 |
|
450 |
|
|
$AnyEvent::HTTP::ACTIVE |
451 |
|
|
The number of active connections. This is not the number of |
452 |
|
|
currently running requests, but the number of currently open and |
453 |
root |
1.15 |
non-idle TCP connections. This number can be useful for |
454 |
root |
1.2 |
load-leveling. |
455 |
root |
1.1 |
|
456 |
root |
1.16 |
SHOWCASE |
457 |
|
|
This section contaisn some more elaborate "real-world" examples or code |
458 |
|
|
snippets. |
459 |
|
|
|
460 |
|
|
HTTP/1.1 FILE DOWNLOAD |
461 |
|
|
Downloading files with HTTP cna be quite tricky, especially when |
462 |
|
|
something goes wrong and you want tor esume. |
463 |
|
|
|
464 |
|
|
Here is a function that initiates and resumes a download. It uses the |
465 |
|
|
last modified time to check for file content changes, and works with |
466 |
|
|
many HTTP/1.0 servers as well, and usually falls back to a complete |
467 |
|
|
re-download on older servers. |
468 |
|
|
|
469 |
|
|
It calls the completion callback with either "undef", which means a |
470 |
|
|
nonretryable error occured, 0 when the download was partial and should |
471 |
|
|
be retried, and 1 if it was successful. |
472 |
|
|
|
473 |
|
|
use AnyEvent::HTTP; |
474 |
|
|
|
475 |
|
|
sub download($$$) { |
476 |
|
|
my ($url, $file, $cb) = @_; |
477 |
|
|
|
478 |
|
|
open my $fh, "+<", $file |
479 |
|
|
or die "$file: $!"; |
480 |
|
|
|
481 |
|
|
my %hdr; |
482 |
|
|
my $ofs = 0; |
483 |
|
|
|
484 |
|
|
warn stat $fh; |
485 |
|
|
warn -s _; |
486 |
|
|
if (stat $fh and -s _) { |
487 |
|
|
$ofs = -s _; |
488 |
|
|
warn "-s is ", $ofs;#d# |
489 |
|
|
$hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; |
490 |
|
|
$hdr{"range"} = "bytes=$ofs-"; |
491 |
|
|
} |
492 |
|
|
|
493 |
|
|
http_get $url, |
494 |
|
|
headers => \%hdr, |
495 |
|
|
on_header => sub { |
496 |
|
|
my ($hdr) = @_; |
497 |
|
|
|
498 |
|
|
if ($hdr->{Status} == 200 && $ofs) { |
499 |
|
|
# resume failed |
500 |
|
|
truncate $fh, $ofs = 0; |
501 |
|
|
} |
502 |
|
|
|
503 |
|
|
sysseek $fh, $ofs, 0; |
504 |
|
|
|
505 |
|
|
1 |
506 |
|
|
}, |
507 |
|
|
on_body => sub { |
508 |
|
|
my ($data, $hdr) = @_; |
509 |
|
|
|
510 |
|
|
if ($hdr->{Status} =~ /^2/) { |
511 |
|
|
length $data == syswrite $fh, $data |
512 |
|
|
or return; # abort on write errors |
513 |
|
|
} |
514 |
|
|
|
515 |
|
|
1 |
516 |
|
|
}, |
517 |
|
|
sub { |
518 |
|
|
my (undef, $hdr) = @_; |
519 |
|
|
|
520 |
|
|
my $status = $hdr->{Status}; |
521 |
|
|
|
522 |
|
|
if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) { |
523 |
|
|
utime $fh, $time, $time; |
524 |
|
|
} |
525 |
|
|
|
526 |
|
|
if ($status == 200 || $status == 206 || $status == 416) { |
527 |
|
|
# download ok || resume ok || file already fully downloaded |
528 |
|
|
$cb->(1, $hdr); |
529 |
|
|
|
530 |
|
|
} elsif ($status == 412) { |
531 |
|
|
# file has changed while resuming, delete and retry |
532 |
|
|
unlink $file; |
533 |
|
|
$cb->(0, $hdr); |
534 |
|
|
|
535 |
|
|
} elsif ($status == 500 or $status == 503 or $status =~ /^59/) { |
536 |
|
|
# retry later |
537 |
|
|
$cb->(0, $hdr); |
538 |
|
|
|
539 |
|
|
} else { |
540 |
|
|
$cb->(undef, $hdr); |
541 |
|
|
} |
542 |
|
|
} |
543 |
|
|
; |
544 |
|
|
} |
545 |
|
|
|
546 |
|
|
download "http://server/somelargefile", "/tmp/somelargefile", sub { |
547 |
|
|
if ($_[0]) { |
548 |
|
|
print "OK!\n"; |
549 |
|
|
} elsif (defined $_[0]) { |
550 |
|
|
print "please retry later\n"; |
551 |
|
|
} else { |
552 |
|
|
print "ERROR\n"; |
553 |
|
|
} |
554 |
|
|
}; |
555 |
|
|
|
556 |
|
|
SOCKS PROXIES |
557 |
root |
1.14 |
Socks proxies are not directly supported by AnyEvent::HTTP. You can |
558 |
|
|
compile your perl to support socks, or use an external program such as |
559 |
|
|
socksify (dante) or tsocks to make your program use a socks proxy |
560 |
|
|
transparently. |
561 |
|
|
|
562 |
|
|
Alternatively, for AnyEvent::HTTP only, you can use your own |
563 |
|
|
"tcp_connect" function that does the proxy handshake - here is an |
564 |
|
|
example that works with socks4a proxies: |
565 |
|
|
|
566 |
|
|
use Errno; |
567 |
|
|
use AnyEvent::Util; |
568 |
|
|
use AnyEvent::Socket; |
569 |
|
|
use AnyEvent::Handle; |
570 |
|
|
|
571 |
|
|
# host, port and username of/for your socks4a proxy |
572 |
|
|
my $socks_host = "10.0.0.23"; |
573 |
|
|
my $socks_port = 9050; |
574 |
|
|
my $socks_user = ""; |
575 |
|
|
|
576 |
|
|
sub socks4a_connect { |
577 |
|
|
my ($host, $port, $connect_cb, $prepare_cb) = @_; |
578 |
|
|
|
579 |
|
|
my $hdl = new AnyEvent::Handle |
580 |
|
|
connect => [$socks_host, $socks_port], |
581 |
|
|
on_prepare => sub { $prepare_cb->($_[0]{fh}) }, |
582 |
|
|
on_error => sub { $connect_cb->() }, |
583 |
|
|
; |
584 |
|
|
|
585 |
|
|
$hdl->push_write (pack "CCnNZ*Z*", 4, 1, $port, 1, $socks_user, $host); |
586 |
|
|
|
587 |
|
|
$hdl->push_read (chunk => 8, sub { |
588 |
|
|
my ($hdl, $chunk) = @_; |
589 |
|
|
my ($status, $port, $ipn) = unpack "xCna4", $chunk; |
590 |
|
|
|
591 |
|
|
if ($status == 0x5a) { |
592 |
|
|
$connect_cb->($hdl->{fh}, (format_address $ipn) . ":$port"); |
593 |
|
|
} else { |
594 |
|
|
$! = Errno::ENXIO; $connect_cb->(); |
595 |
|
|
} |
596 |
|
|
}); |
597 |
|
|
|
598 |
|
|
$hdl |
599 |
|
|
} |
600 |
|
|
|
601 |
|
|
Use "socks4a_connect" instead of "tcp_connect" when doing |
602 |
|
|
"http_request"s, possibly after switching off other proxy types: |
603 |
|
|
|
604 |
|
|
AnyEvent::HTTP::set_proxy undef; # usually you do not want other proxies |
605 |
|
|
|
606 |
|
|
http_get 'http://www.google.com', tcp_connect => \&socks4a_connect, sub { |
607 |
|
|
my ($data, $headers) = @_; |
608 |
|
|
... |
609 |
|
|
}; |
610 |
|
|
|
611 |
root |
1.1 |
SEE ALSO |
612 |
root |
1.2 |
AnyEvent. |
613 |
root |
1.1 |
|
614 |
|
|
AUTHOR |
615 |
root |
1.3 |
Marc Lehmann <schmorp@schmorp.de> |
616 |
|
|
http://home.schmorp.de/ |
617 |
root |
1.1 |
|
618 |
root |
1.7 |
With many thanks to Дмитрий Шалашов, who provided |
619 |
|
|
countless testcases and bugreports. |
620 |
|
|
|