… | |
… | |
12 | This module is an AnyEvent user, you need to make sure that you use and |
12 | This module is an AnyEvent user, you need to make sure that you use and |
13 | run a supported event loop. |
13 | run a supported event loop. |
14 | |
14 | |
15 | This module implements a simple, stateless and non-blocking HTTP client. |
15 | This module implements a simple, stateless and non-blocking HTTP client. |
16 | It supports GET, POST and other request methods, cookies and more, all |
16 | It supports GET, POST and other request methods, cookies and more, all |
17 | on a very low level. It can follow redirects supports proxies and |
17 | on a very low level. It can follow redirects, supports proxies, and |
18 | automatically limits the number of connections to the values specified |
18 | automatically limits the number of connections to the values specified |
19 | in the RFC. |
19 | in the RFC. |
20 | |
20 | |
21 | It should generally be a "good client" that is enough for most HTTP |
21 | It should generally be a "good client" that is enough for most HTTP |
22 | tasks. Simple tasks should be simple, but complex tasks should still be |
22 | tasks. Simple tasks should be simple, but complex tasks should still be |
… | |
… | |
106 | Additional parameters are key-value pairs, and are fully optional. |
106 | Additional parameters are key-value pairs, and are fully optional. |
107 | They include: |
107 | They include: |
108 | |
108 | |
109 | recurse => $count (default: $MAX_RECURSE) |
109 | recurse => $count (default: $MAX_RECURSE) |
110 | Whether to recurse requests or not, e.g. on redirects, |
110 | Whether to recurse requests or not, e.g. on redirects, |
111 | authentication retries and so on, and how often to do so. |
111 | authentication and other retries and so on, and how often to do |
|
|
112 | so. |
112 | |
113 | |
113 | headers => hashref |
114 | headers => hashref |
114 | The request headers to use. Currently, "http_request" may |
115 | The request headers to use. Currently, "http_request" may |
115 | provide its own "Host:", "Content-Length:", "Connection:" and |
116 | provide its own "Host:", "Content-Length:", "Connection:" and |
116 | "Cookie:" headers and will provide defaults at least for "TE:", |
117 | "Cookie:" headers and will provide defaults at least for "TE:", |
… | |
… | |
121 | You really should provide your own "User-Agent:" header value |
122 | You really should provide your own "User-Agent:" header value |
122 | that is appropriate for your program - I wouldn't be surprised |
123 | that is appropriate for your program - I wouldn't be surprised |
123 | if the default AnyEvent string gets blocked by webservers sooner |
124 | if the default AnyEvent string gets blocked by webservers sooner |
124 | or later. |
125 | or later. |
125 | |
126 | |
|
|
127 | Also, make sure that your headers names and values do not |
|
|
128 | contain any embedded newlines. |
|
|
129 | |
126 | timeout => $seconds |
130 | timeout => $seconds |
127 | The time-out to use for various stages - each connect attempt |
131 | The time-out to use for various stages - each connect attempt |
128 | will reset the timeout, as will read or write activity, i.e. |
132 | will reset the timeout, as will read or write activity, i.e. |
129 | this is not an overall timeout. |
133 | this is not an overall timeout. |
130 | |
134 | |
131 | Default timeout is 5 minutes. |
135 | Default timeout is 5 minutes. |
132 | |
136 | |
133 | proxy => [$host, $port[, $scheme]] or undef |
137 | proxy => [$host, $port[, $scheme]] or undef |
134 | Use the given http proxy for all requests. If not specified, |
138 | Use the given http proxy for all requests, or no proxy if |
135 | then the default proxy (as specified by $ENV{http_proxy}) is |
|
|
136 | used. |
139 | "undef" is used. |
137 | |
140 | |
138 | $scheme must be either missing or must be "http" for HTTP. |
141 | $scheme must be either missing or must be "http" for HTTP. |
|
|
142 | |
|
|
143 | If not specified, then the default proxy is used (see |
|
|
144 | "AnyEvent::HTTP::set_proxy"). |
139 | |
145 | |
140 | body => $string |
146 | body => $string |
141 | The request body, usually empty. Will be sent as-is (future |
147 | The request body, usually empty. Will be sent as-is (future |
142 | versions of this module might offer more options). |
148 | versions of this module might offer more options). |
143 | |
149 | |
… | |
… | |
331 | |
337 | |
332 | Example: do a HTTP HEAD request on https://www.google.com/, use a |
338 | Example: do a HTTP HEAD request on https://www.google.com/, use a |
333 | timeout of 30 seconds. |
339 | timeout of 30 seconds. |
334 | |
340 | |
335 | http_request |
341 | http_request |
336 | GET => "https://www.google.com", |
342 | HEAD => "https://www.google.com", |
337 | headers => { "user-agent" => "MySearchClient 1.0" }, |
343 | headers => { "user-agent" => "MySearchClient 1.0" }, |
338 | timeout => 30, |
344 | timeout => 30, |
339 | sub { |
345 | sub { |
340 | my ($body, $hdr) = @_; |
346 | my ($body, $hdr) = @_; |
341 | use Data::Dumper; |
347 | use Data::Dumper; |
… | |
… | |
365 | AnyEvent::HTTP::set_proxy "proxy-url" |
371 | AnyEvent::HTTP::set_proxy "proxy-url" |
366 | Sets the default proxy server to use. The proxy-url must begin with |
372 | Sets the default proxy server to use. The proxy-url must begin with |
367 | a string of the form "http://host:port", croaks otherwise. |
373 | a string of the form "http://host:port", croaks otherwise. |
368 | |
374 | |
369 | To clear an already-set proxy, use "undef". |
375 | To clear an already-set proxy, use "undef". |
|
|
376 | |
|
|
377 | When AnyEvent::HTTP is laoded for the first time it will query the |
|
|
378 | default proxy from the operating system, currently by looking at |
|
|
379 | "$ENV{http_proxy"}. |
370 | |
380 | |
371 | AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] |
381 | AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] |
372 | Remove all cookies from the cookie jar that have been expired. If |
382 | Remove all cookies from the cookie jar that have been expired. If |
373 | $session_end is given and true, then additionally remove all session |
383 | $session_end is given and true, then additionally remove all session |
374 | cookies. |
384 | cookies. |
… | |
… | |
419 | |
429 | |
420 | $AnyEvent::HTTP::MAX_RECURSE |
430 | $AnyEvent::HTTP::MAX_RECURSE |
421 | The default value for the "recurse" request parameter (default: 10). |
431 | The default value for the "recurse" request parameter (default: 10). |
422 | |
432 | |
423 | $AnyEvent::HTTP::TIMEOUT |
433 | $AnyEvent::HTTP::TIMEOUT |
424 | The default timeout for conenction operations (default: 300). |
434 | The default timeout for connection operations (default: 300). |
425 | |
435 | |
426 | $AnyEvent::HTTP::USERAGENT |
436 | $AnyEvent::HTTP::USERAGENT |
427 | The default value for the "User-Agent" header (the default is |
437 | The default value for the "User-Agent" header (the default is |
428 | "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; |
438 | "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; |
429 | +http://software.schmorp.de/pkg/AnyEvent)"). |
439 | +http://software.schmorp.de/pkg/AnyEvent)"). |
… | |
… | |
451 | The number of active connections. This is not the number of |
461 | The number of active connections. This is not the number of |
452 | currently running requests, but the number of currently open and |
462 | currently running requests, but the number of currently open and |
453 | non-idle TCP connections. This number can be useful for |
463 | non-idle TCP connections. This number can be useful for |
454 | load-leveling. |
464 | load-leveling. |
455 | |
465 | |
|
|
466 | SHOWCASE |
|
|
467 | This section contaisn some more elaborate "real-world" examples or code |
|
|
468 | snippets. |
|
|
469 | |
|
|
470 | HTTP/1.1 FILE DOWNLOAD |
|
|
471 | Downloading files with HTTP can be quite tricky, especially when |
|
|
472 | something goes wrong and you want to resume. |
|
|
473 | |
|
|
474 | Here is a function that initiates and resumes a download. It uses the |
|
|
475 | last modified time to check for file content changes, and works with |
|
|
476 | many HTTP/1.0 servers as well, and usually falls back to a complete |
|
|
477 | re-download on older servers. |
|
|
478 | |
|
|
479 | It calls the completion callback with either "undef", which means a |
|
|
480 | nonretryable error occured, 0 when the download was partial and should |
|
|
481 | be retried, and 1 if it was successful. |
|
|
482 | |
|
|
483 | use AnyEvent::HTTP; |
|
|
484 | |
|
|
485 | sub download($$$) { |
|
|
486 | my ($url, $file, $cb) = @_; |
|
|
487 | |
|
|
488 | open my $fh, "+<", $file |
|
|
489 | or die "$file: $!"; |
|
|
490 | |
|
|
491 | my %hdr; |
|
|
492 | my $ofs = 0; |
|
|
493 | |
|
|
494 | warn stat $fh; |
|
|
495 | warn -s _; |
|
|
496 | if (stat $fh and -s _) { |
|
|
497 | $ofs = -s _; |
|
|
498 | warn "-s is ", $ofs; |
|
|
499 | $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; |
|
|
500 | $hdr{"range"} = "bytes=$ofs-"; |
|
|
501 | } |
|
|
502 | |
|
|
503 | http_get $url, |
|
|
504 | headers => \%hdr, |
|
|
505 | on_header => sub { |
|
|
506 | my ($hdr) = @_; |
|
|
507 | |
|
|
508 | if ($hdr->{Status} == 200 && $ofs) { |
|
|
509 | # resume failed |
|
|
510 | truncate $fh, $ofs = 0; |
|
|
511 | } |
|
|
512 | |
|
|
513 | sysseek $fh, $ofs, 0; |
|
|
514 | |
|
|
515 | 1 |
|
|
516 | }, |
|
|
517 | on_body => sub { |
|
|
518 | my ($data, $hdr) = @_; |
|
|
519 | |
|
|
520 | if ($hdr->{Status} =~ /^2/) { |
|
|
521 | length $data == syswrite $fh, $data |
|
|
522 | or return; # abort on write errors |
|
|
523 | } |
|
|
524 | |
|
|
525 | 1 |
|
|
526 | }, |
|
|
527 | sub { |
|
|
528 | my (undef, $hdr) = @_; |
|
|
529 | |
|
|
530 | my $status = $hdr->{Status}; |
|
|
531 | |
|
|
532 | if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) { |
|
|
533 | utime $fh, $time, $time; |
|
|
534 | } |
|
|
535 | |
|
|
536 | if ($status == 200 || $status == 206 || $status == 416) { |
|
|
537 | # download ok || resume ok || file already fully downloaded |
|
|
538 | $cb->(1, $hdr); |
|
|
539 | |
|
|
540 | } elsif ($status == 412) { |
|
|
541 | # file has changed while resuming, delete and retry |
|
|
542 | unlink $file; |
|
|
543 | $cb->(0, $hdr); |
|
|
544 | |
|
|
545 | } elsif ($status == 500 or $status == 503 or $status =~ /^59/) { |
|
|
546 | # retry later |
|
|
547 | $cb->(0, $hdr); |
|
|
548 | |
|
|
549 | } else { |
|
|
550 | $cb->(undef, $hdr); |
|
|
551 | } |
|
|
552 | } |
|
|
553 | ; |
|
|
554 | } |
|
|
555 | |
|
|
556 | download "http://server/somelargefile", "/tmp/somelargefile", sub { |
|
|
557 | if ($_[0]) { |
|
|
558 | print "OK!\n"; |
|
|
559 | } elsif (defined $_[0]) { |
|
|
560 | print "please retry later\n"; |
|
|
561 | } else { |
|
|
562 | print "ERROR\n"; |
|
|
563 | } |
|
|
564 | }; |
|
|
565 | |
456 | SOCKS PROXIES |
566 | SOCKS PROXIES |
457 | Socks proxies are not directly supported by AnyEvent::HTTP. You can |
567 | Socks proxies are not directly supported by AnyEvent::HTTP. You can |
458 | compile your perl to support socks, or use an external program such as |
568 | compile your perl to support socks, or use an external program such as |
459 | socksify (dante) or tsocks to make your program use a socks proxy |
569 | socksify (dante) or tsocks to make your program use a socks proxy |
460 | transparently. |
570 | transparently. |
461 | |
571 | |