… | |
… | |
50 | object at least alive until the callback get called. If the object |
50 | object at least alive until the callback get called. If the object |
51 | gets destroyed before the callback is called, the request will be |
51 | gets destroyed before the callback is called, the request will be |
52 | cancelled. |
52 | cancelled. |
53 | |
53 | |
54 | The callback will be called with the response body data as first |
54 | The callback will be called with the response body data as first |
55 | argument (or "undef" if an error occured), and a hash-ref with |
55 | argument (or "undef" if an error occurred), and a hash-ref with |
56 | response headers (and trailers) as second argument. |
56 | response headers (and trailers) as second argument. |
57 | |
57 | |
58 | All the headers in that hash are lowercased. In addition to the |
58 | All the headers in that hash are lowercased. In addition to the |
59 | response headers, the "pseudo-headers" (uppercase to avoid clashing |
59 | response headers, the "pseudo-headers" (uppercase to avoid clashing |
60 | with possible response headers) "HTTPVersion", "Status" and "Reason" |
60 | with possible response headers) "HTTPVersion", "Status" and "Reason" |
… | |
… | |
82 | If an internal error occurs, such as not being able to resolve a |
82 | If an internal error occurs, such as not being able to resolve a |
83 | hostname, then $data will be "undef", "$headers->{Status}" will be |
83 | hostname, then $data will be "undef", "$headers->{Status}" will be |
84 | 590-599 and the "Reason" pseudo-header will contain an error |
84 | 590-599 and the "Reason" pseudo-header will contain an error |
85 | message. Currently the following status codes are used: |
85 | message. Currently the following status codes are used: |
86 | |
86 | |
87 | 595 - errors during connection etsbalishment, proxy handshake. |
87 | 595 - errors during connection establishment, proxy handshake. |
88 | 596 - errors during TLS negotiation, request sending and header |
88 | 596 - errors during TLS negotiation, request sending and header |
89 | processing. |
89 | processing. |
90 | 597 - errors during body receiving or processing. |
90 | 597 - errors during body receiving or processing. |
91 | 598 - user aborted request via "on_header" or "on_body". |
91 | 598 - user aborted request via "on_header" or "on_body". |
92 | 599 - other, usually nonretryable, errors (garbled URL etc.). |
92 | 599 - other, usually nonretryable, errors (garbled URL etc.). |
… | |
… | |
106 | Additional parameters are key-value pairs, and are fully optional. |
106 | Additional parameters are key-value pairs, and are fully optional. |
107 | They include: |
107 | They include: |
108 | |
108 | |
109 | recurse => $count (default: $MAX_RECURSE) |
109 | recurse => $count (default: $MAX_RECURSE) |
110 | Whether to recurse requests or not, e.g. on redirects, |
110 | Whether to recurse requests or not, e.g. on redirects, |
111 | authentication retries and so on, and how often to do so. |
111 | authentication and other retries and so on, and how often to do |
|
|
112 | so. |
|
|
113 | |
|
|
114 | Only redirects to http and https URLs are supported. While most |
|
|
115 | common redirection forms are handled entirely within this |
|
|
116 | module, some require the use of the optional URI module. If it |
|
|
117 | is required but missing, then the request will fail with an |
|
|
118 | error. |
112 | |
119 | |
113 | headers => hashref |
120 | headers => hashref |
114 | The request headers to use. Currently, "http_request" may |
121 | The request headers to use. Currently, "http_request" may |
115 | provide its own "Host:", "Content-Length:", "Connection:" and |
122 | provide its own "Host:", "Content-Length:", "Connection:" and |
116 | "Cookie:" headers and will provide defaults at least for "TE:", |
123 | "Cookie:" headers and will provide defaults at least for "TE:", |
… | |
… | |
121 | You really should provide your own "User-Agent:" header value |
128 | You really should provide your own "User-Agent:" header value |
122 | that is appropriate for your program - I wouldn't be surprised |
129 | that is appropriate for your program - I wouldn't be surprised |
123 | if the default AnyEvent string gets blocked by webservers sooner |
130 | if the default AnyEvent string gets blocked by webservers sooner |
124 | or later. |
131 | or later. |
125 | |
132 | |
|
|
133 | Also, make sure that your headers names and values do not |
|
|
134 | contain any embedded newlines. |
|
|
135 | |
126 | timeout => $seconds |
136 | timeout => $seconds |
127 | The time-out to use for various stages - each connect attempt |
137 | The time-out to use for various stages - each connect attempt |
128 | will reset the timeout, as will read or write activity, i.e. |
138 | will reset the timeout, as will read or write activity, i.e. |
129 | this is not an overall timeout. |
139 | this is not an overall timeout. |
130 | |
140 | |
… | |
… | |
136 | |
146 | |
137 | $scheme must be either missing or must be "http" for HTTP. |
147 | $scheme must be either missing or must be "http" for HTTP. |
138 | |
148 | |
139 | If not specified, then the default proxy is used (see |
149 | If not specified, then the default proxy is used (see |
140 | "AnyEvent::HTTP::set_proxy"). |
150 | "AnyEvent::HTTP::set_proxy"). |
|
|
151 | |
|
|
152 | Currently, if your proxy requires authorization, you have to |
|
|
153 | specify an appropriate "Proxy-Authorization" header in every |
|
|
154 | request. |
|
|
155 | |
|
|
156 | Note that this module will prefer an existing persistent |
|
|
157 | connection, even if that connection was made using another |
|
|
158 | proxy. If you need to ensure that a new connection is made in |
|
|
159 | this case, you can either force "persistent" to false or e.g. |
|
|
160 | use the proxy address in your "sessionid". |
141 | |
161 | |
142 | body => $string |
162 | body => $string |
143 | The request body, usually empty. Will be sent as-is (future |
163 | The request body, usually empty. Will be sent as-is (future |
144 | versions of this module might offer more options). |
164 | versions of this module might offer more options). |
145 | |
165 | |
… | |
… | |
175 | The default for this option is "low", which could be interpreted |
195 | The default for this option is "low", which could be interpreted |
176 | as "give me the page, no matter what". |
196 | as "give me the page, no matter what". |
177 | |
197 | |
178 | See also the "sessionid" parameter. |
198 | See also the "sessionid" parameter. |
179 | |
199 | |
180 | session => $string |
200 | sessionid => $string |
181 | The module might reuse connections to the same host internally. |
201 | The module might reuse connections to the same host internally |
182 | Sometimes (e.g. when using TLS), you do not want to reuse |
202 | (regardless of other settings, such as "tcp_connect" or |
183 | connections from other sessions. This can be achieved by setting |
203 | "proxy"). Sometimes (e.g. when using TLS or a specfic proxy), |
184 | this parameter to some unique ID (such as the address of an |
204 | you do not want to reuse connections from other sessions. This |
185 | object storing your state data, or the TLS context) - only |
205 | can be achieved by setting this parameter to some unique ID |
186 | connections using the same unique ID will be reused. |
206 | (such as the address of an object storing your state data or the |
|
|
207 | TLS context, or the proxy IP) - only connections using the same |
|
|
208 | unique ID will be reused. |
187 | |
209 | |
188 | on_prepare => $callback->($fh) |
210 | on_prepare => $callback->($fh) |
189 | In rare cases you need to "tune" the socket before it is used to |
211 | In rare cases you need to "tune" the socket before it is used to |
190 | connect (for exmaple, to bind it on a given IP address). This |
212 | connect (for example, to bind it on a given IP address). This |
191 | parameter overrides the prepare callback passed to |
213 | parameter overrides the prepare callback passed to |
192 | "AnyEvent::Socket::tcp_connect" and behaves exactly the same way |
214 | "AnyEvent::Socket::tcp_connect" and behaves exactly the same way |
193 | (e.g. it has to provide a timeout). See the description for the |
215 | (e.g. it has to provide a timeout). See the description for the |
194 | $prepare_cb argument of "AnyEvent::Socket::tcp_connect" for |
216 | $prepare_cb argument of "AnyEvent::Socket::tcp_connect" for |
195 | details. |
217 | details. |
… | |
… | |
200 | AnyEvent::HTTP establishes connections. Normally it uses |
222 | AnyEvent::HTTP establishes connections. Normally it uses |
201 | AnyEvent::Socket::tcp_connect to do this, but you can provide |
223 | AnyEvent::Socket::tcp_connect to do this, but you can provide |
202 | your own "tcp_connect" function - obviously, it has to follow |
224 | your own "tcp_connect" function - obviously, it has to follow |
203 | the same calling conventions, except that it may always return a |
225 | the same calling conventions, except that it may always return a |
204 | connection guard object. |
226 | connection guard object. |
|
|
227 | |
|
|
228 | The connections made by this hook will be treated as equivalent |
|
|
229 | to connections made the built-in way, specifically, they will be |
|
|
230 | put into and taken from the persistent connection cache. If your |
|
|
231 | $tcp_connect function is incompatible with this kind of re-use, |
|
|
232 | consider switching off "persistent" connections and/or providing |
|
|
233 | a "sessionid" identifier. |
205 | |
234 | |
206 | There are probably lots of weird uses for this function, |
235 | There are probably lots of weird uses for this function, |
207 | starting from tracing the hosts "http_request" actually tries to |
236 | starting from tracing the hosts "http_request" actually tries to |
208 | connect, to (inexact but fast) host => IP address caching or |
237 | connect, to (inexact but fast) host => IP address caching or |
209 | even socks protocol support. |
238 | even socks protocol support. |
… | |
… | |
283 | |
312 | |
284 | persistent => $boolean |
313 | persistent => $boolean |
285 | Try to create/reuse a persistent connection. When this flag is |
314 | Try to create/reuse a persistent connection. When this flag is |
286 | set (default: true for idempotent requests, false for all |
315 | set (default: true for idempotent requests, false for all |
287 | others), then "http_request" tries to re-use an existing |
316 | others), then "http_request" tries to re-use an existing |
288 | (previously-created) persistent connection to the host and, |
317 | (previously-created) persistent connection to same host (i.e. |
|
|
318 | identical URL scheme, hostname, port and sessionid) and, failing |
289 | failing that, tries to create a new one. |
319 | that, tries to create a new one. |
290 | |
320 | |
291 | Requests failing in certain ways will be automatically retried |
321 | Requests failing in certain ways will be automatically retried |
292 | once, which is dangerous for non-idempotent requests, which is |
322 | once, which is dangerous for non-idempotent requests, which is |
293 | why it defaults to off for them. The reason for this is because |
323 | why it defaults to off for them. The reason for this is because |
294 | the bozos who designed HTTP/1.1 made it impossible to |
324 | the bozos who designed HTTP/1.1 made it impossible to |
295 | distinguish between a fatal error and a normal connection |
325 | distinguish between a fatal error and a normal connection |
296 | timeout, so you never know whether there was a problem with your |
326 | timeout, so you never know whether there was a problem with your |
297 | request or not. |
327 | request or not. |
298 | |
328 | |
299 | When reusing an existent connection, many parameters (such as |
329 | When reusing an existent connection, many parameters (such as |
300 | TLS context) will be ignored. See the "session" parameter for a |
330 | TLS context) will be ignored. See the "sessionid" parameter for |
301 | workaround. |
331 | a workaround. |
302 | |
332 | |
303 | keepalive => $boolean |
333 | keepalive => $boolean |
304 | Only used when "persistent" is also true. This parameter decides |
334 | Only used when "persistent" is also true. This parameter decides |
305 | whether "http_request" tries to handshake a HTTP/1.0-style |
335 | whether "http_request" tries to handshake a HTTP/1.0-style |
306 | keep-alive connection (as opposed to only a HTTP/1.1 persistent |
336 | keep-alive connection (as opposed to only a HTTP/1.1 persistent |
… | |
… | |
333 | |
363 | |
334 | Example: do a HTTP HEAD request on https://www.google.com/, use a |
364 | Example: do a HTTP HEAD request on https://www.google.com/, use a |
335 | timeout of 30 seconds. |
365 | timeout of 30 seconds. |
336 | |
366 | |
337 | http_request |
367 | http_request |
338 | GET => "https://www.google.com", |
368 | HEAD => "https://www.google.com", |
339 | headers => { "user-agent" => "MySearchClient 1.0" }, |
369 | headers => { "user-agent" => "MySearchClient 1.0" }, |
340 | timeout => 30, |
370 | timeout => 30, |
341 | sub { |
371 | sub { |
342 | my ($body, $hdr) = @_; |
372 | my ($body, $hdr) = @_; |
343 | use Data::Dumper; |
373 | use Data::Dumper; |
… | |
… | |
368 | Sets the default proxy server to use. The proxy-url must begin with |
398 | Sets the default proxy server to use. The proxy-url must begin with |
369 | a string of the form "http://host:port", croaks otherwise. |
399 | a string of the form "http://host:port", croaks otherwise. |
370 | |
400 | |
371 | To clear an already-set proxy, use "undef". |
401 | To clear an already-set proxy, use "undef". |
372 | |
402 | |
373 | When AnyEvent::HTTP is laoded for the first time it will query the |
403 | When AnyEvent::HTTP is loaded for the first time it will query the |
374 | default proxy from the operating system, currently by looking at |
404 | default proxy from the operating system, currently by looking at |
375 | "$ENV{http_proxy"}. |
405 | "$ENV{http_proxy"}. |
376 | |
406 | |
377 | AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] |
407 | AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] |
378 | Remove all cookies from the cookie jar that have been expired. If |
408 | Remove all cookies from the cookie jar that have been expired. If |
… | |
… | |
380 | cookies. |
410 | cookies. |
381 | |
411 | |
382 | You should call this function (with a true $session_end) before you |
412 | You should call this function (with a true $session_end) before you |
383 | save cookies to disk, and you should call this function after |
413 | save cookies to disk, and you should call this function after |
384 | loading them again. If you have a long-running program you can |
414 | loading them again. If you have a long-running program you can |
385 | additonally call this function from time to time. |
415 | additionally call this function from time to time. |
386 | |
416 | |
387 | A cookie jar is initially an empty hash-reference that is managed by |
417 | A cookie jar is initially an empty hash-reference that is managed by |
388 | this module. It's format is subject to change, but currently it is |
418 | this module. Its format is subject to change, but currently it is as |
389 | like this: |
419 | follows: |
390 | |
420 | |
391 | The key "version" has to contain 1, otherwise the hash gets emptied. |
421 | The key "version" has to contain 2, otherwise the hash gets cleared. |
392 | All other keys are hostnames or IP addresses pointing to |
422 | All other keys are hostnames or IP addresses pointing to |
393 | hash-references. The key for these inner hash references is the |
423 | hash-references. The key for these inner hash references is the |
394 | server path for which this cookie is meant, and the values are again |
424 | server path for which this cookie is meant, and the values are again |
395 | hash-references. The keys of those hash-references is the cookie |
425 | hash-references. Each key of those hash-references is a cookie name, |
396 | name, and the value, you guessed it, is another hash-reference, this |
426 | and the value, you guessed it, is another hash-reference, this time |
397 | time with the key-value pairs from the cookie, except for "expires" |
427 | with the key-value pairs from the cookie, except for "expires" and |
398 | and "max-age", which have been replaced by a "_expires" key that |
428 | "max-age", which have been replaced by a "_expires" key that |
399 | contains the cookie expiry timestamp. |
429 | contains the cookie expiry timestamp. Session cookies are indicated |
|
|
430 | by not having an "_expires" key. |
400 | |
431 | |
401 | Here is an example of a cookie jar with a single cookie, so you have |
432 | Here is an example of a cookie jar with a single cookie, so you have |
402 | a chance of understanding the above paragraph: |
433 | a chance of understanding the above paragraph: |
403 | |
434 | |
404 | { |
435 | { |
405 | version => 1, |
436 | version => 2, |
406 | "10.0.0.1" => { |
437 | "10.0.0.1" => { |
407 | "/" => { |
438 | "/" => { |
408 | "mythweb_id" => { |
439 | "mythweb_id" => { |
409 | _expires => 1293917923, |
440 | _expires => 1293917923, |
410 | value => "ooRung9dThee3ooyXooM1Ohm", |
441 | value => "ooRung9dThee3ooyXooM1Ohm", |
… | |
… | |
425 | |
456 | |
426 | $AnyEvent::HTTP::MAX_RECURSE |
457 | $AnyEvent::HTTP::MAX_RECURSE |
427 | The default value for the "recurse" request parameter (default: 10). |
458 | The default value for the "recurse" request parameter (default: 10). |
428 | |
459 | |
429 | $AnyEvent::HTTP::TIMEOUT |
460 | $AnyEvent::HTTP::TIMEOUT |
430 | The default timeout for conenction operations (default: 300). |
461 | The default timeout for connection operations (default: 300). |
431 | |
462 | |
432 | $AnyEvent::HTTP::USERAGENT |
463 | $AnyEvent::HTTP::USERAGENT |
433 | The default value for the "User-Agent" header (the default is |
464 | The default value for the "User-Agent" header (the default is |
434 | "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; |
465 | "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; |
435 | +http://software.schmorp.de/pkg/AnyEvent)"). |
466 | +http://software.schmorp.de/pkg/AnyEvent)"). |
436 | |
467 | |
437 | $AnyEvent::HTTP::MAX_PER_HOST |
468 | $AnyEvent::HTTP::MAX_PER_HOST |
438 | The maximum number of concurrent connections to the same host |
469 | The maximum number of concurrent connections to the same host |
439 | (identified by the hostname). If the limit is exceeded, then the |
470 | (identified by the hostname). If the limit is exceeded, then |
440 | additional requests are queued until previous connections are |
471 | additional requests are queued until previous connections are |
441 | closed. Both persistent and non-persistent connections are counted |
472 | closed. Both persistent and non-persistent connections are counted |
442 | in this limit. |
473 | in this limit. |
443 | |
474 | |
444 | The default value for this is 4, and it is highly advisable to not |
475 | The default value for this is 4, and it is highly advisable to not |
445 | increase it much. |
476 | increase it much. |
446 | |
477 | |
447 | For comparison: the RFC's recommend 4 non-persistent or 2 persistent |
478 | For comparison: the RFC's recommend 4 non-persistent or 2 persistent |
448 | connections, older browsers used 2, newers (such as firefox 3) |
479 | connections, older browsers used 2, newer ones (such as firefox 3) |
449 | typically use 6, and Opera uses 8 because like, they have the |
480 | typically use 6, and Opera uses 8 because like, they have the |
450 | fastest browser and give a shit for everybody else on the planet. |
481 | fastest browser and give a shit for everybody else on the planet. |
451 | |
482 | |
452 | $AnyEvent::HTTP::PERSISTENT_TIMEOUT |
483 | $AnyEvent::HTTP::PERSISTENT_TIMEOUT |
453 | The time after which idle persistent conenctions get closed by |
484 | The time after which idle persistent connections get closed by |
454 | AnyEvent::HTTP (default: 3). |
485 | AnyEvent::HTTP (default: 3). |
455 | |
486 | |
456 | $AnyEvent::HTTP::ACTIVE |
487 | $AnyEvent::HTTP::ACTIVE |
457 | The number of active connections. This is not the number of |
488 | The number of active connections. This is not the number of |
458 | currently running requests, but the number of currently open and |
489 | currently running requests, but the number of currently open and |
459 | non-idle TCP connections. This number can be useful for |
490 | non-idle TCP connections. This number can be useful for |
460 | load-leveling. |
491 | load-leveling. |
461 | |
492 | |
462 | SHOWCASE |
493 | SHOWCASE |
463 | This section contaisn some more elaborate "real-world" examples or code |
494 | This section contains some more elaborate "real-world" examples or code |
464 | snippets. |
495 | snippets. |
465 | |
496 | |
466 | HTTP/1.1 FILE DOWNLOAD |
497 | HTTP/1.1 FILE DOWNLOAD |
467 | Downloading files with HTTP can be quite tricky, especially when |
498 | Downloading files with HTTP can be quite tricky, especially when |
468 | something goes wrong and you want to resume. |
499 | something goes wrong and you want to resume. |
… | |
… | |
471 | last modified time to check for file content changes, and works with |
502 | last modified time to check for file content changes, and works with |
472 | many HTTP/1.0 servers as well, and usually falls back to a complete |
503 | many HTTP/1.0 servers as well, and usually falls back to a complete |
473 | re-download on older servers. |
504 | re-download on older servers. |
474 | |
505 | |
475 | It calls the completion callback with either "undef", which means a |
506 | It calls the completion callback with either "undef", which means a |
476 | nonretryable error occured, 0 when the download was partial and should |
507 | nonretryable error occurred, 0 when the download was partial and should |
477 | be retried, and 1 if it was successful. |
508 | be retried, and 1 if it was successful. |
478 | |
509 | |
479 | use AnyEvent::HTTP; |
510 | use AnyEvent::HTTP; |
480 | |
511 | |
481 | sub download($$$) { |
512 | sub download($$$) { |
… | |
… | |
485 | or die "$file: $!"; |
516 | or die "$file: $!"; |
486 | |
517 | |
487 | my %hdr; |
518 | my %hdr; |
488 | my $ofs = 0; |
519 | my $ofs = 0; |
489 | |
520 | |
490 | warn stat $fh; |
|
|
491 | warn -s _; |
|
|
492 | if (stat $fh and -s _) { |
521 | if (stat $fh and -s _) { |
493 | $ofs = -s _; |
522 | $ofs = -s _; |
494 | warn "-s is ", $ofs;#d# |
523 | warn "-s is ", $ofs; |
495 | $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; |
524 | $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; |
496 | $hdr{"range"} = "bytes=$ofs-"; |
525 | $hdr{"range"} = "bytes=$ofs-"; |
497 | } |
526 | } |
498 | |
527 | |
499 | http_get $url, |
528 | http_get $url, |
… | |
… | |
524 | my (undef, $hdr) = @_; |
553 | my (undef, $hdr) = @_; |
525 | |
554 | |
526 | my $status = $hdr->{Status}; |
555 | my $status = $hdr->{Status}; |
527 | |
556 | |
528 | if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) { |
557 | if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) { |
529 | utime $fh, $time, $time; |
558 | utime $time, $time, $fh; |
530 | } |
559 | } |
531 | |
560 | |
532 | if ($status == 200 || $status == 206 || $status == 416) { |
561 | if ($status == 200 || $status == 206 || $status == 416) { |
533 | # download ok || resume ok || file already fully downloaded |
562 | # download ok || resume ok || file already fully downloaded |
534 | $cb->(1, $hdr); |
563 | $cb->(1, $hdr); |
… | |
… | |
619 | |
648 | |
620 | AUTHOR |
649 | AUTHOR |
621 | Marc Lehmann <schmorp@schmorp.de> |
650 | Marc Lehmann <schmorp@schmorp.de> |
622 | http://home.schmorp.de/ |
651 | http://home.schmorp.de/ |
623 | |
652 | |
624 | With many thanks to Дмитрий Шалашов, who provided |
653 | With many thanks to Дмитрий Шалашов, who provided countless testcases |
625 | countless testcases and bugreports. |
654 | and bugreports. |
626 | |
655 | |