… | |
… | |
15 | This module is an L<AnyEvent> user, you need to make sure that you use and |
15 | This module is an L<AnyEvent> user, you need to make sure that you use and |
16 | run a supported event loop. |
16 | run a supported event loop. |
17 | |
17 | |
18 | This module implements a simple, stateless and non-blocking HTTP |
18 | This module implements a simple, stateless and non-blocking HTTP |
19 | client. It supports GET, POST and other request methods, cookies and more, |
19 | client. It supports GET, POST and other request methods, cookies and more, |
20 | all on a very low level. It can follow redirects supports proxies and |
20 | all on a very low level. It can follow redirects, supports proxies, and |
21 | automatically limits the number of connections to the values specified in |
21 | automatically limits the number of connections to the values specified in |
22 | the RFC. |
22 | the RFC. |
23 | |
23 | |
24 | It should generally be a "good client" that is enough for most HTTP |
24 | It should generally be a "good client" that is enough for most HTTP |
25 | tasks. Simple tasks should be simple, but complex tasks should still be |
25 | tasks. Simple tasks should be simple, but complex tasks should still be |
… | |
… | |
46 | use AnyEvent::Util (); |
46 | use AnyEvent::Util (); |
47 | use AnyEvent::Handle (); |
47 | use AnyEvent::Handle (); |
48 | |
48 | |
49 | use base Exporter::; |
49 | use base Exporter::; |
50 | |
50 | |
51 | our $VERSION = '2.0'; |
51 | our $VERSION = '2.04'; |
52 | |
52 | |
53 | our @EXPORT = qw(http_get http_post http_head http_request); |
53 | our @EXPORT = qw(http_get http_post http_head http_request); |
54 | |
54 | |
55 | our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; |
55 | our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; |
56 | our $MAX_RECURSE = 10; |
56 | our $MAX_RECURSE = 10; |
… | |
… | |
179 | |
179 | |
180 | Default timeout is 5 minutes. |
180 | Default timeout is 5 minutes. |
181 | |
181 | |
182 | =item proxy => [$host, $port[, $scheme]] or undef |
182 | =item proxy => [$host, $port[, $scheme]] or undef |
183 | |
183 | |
184 | Use the given http proxy for all requests. If not specified, then the |
184 | Use the given http proxy for all requests, or no proxy if C<undef> is |
185 | default proxy (as specified by C<$ENV{http_proxy}>) is used. |
185 | used. |
186 | |
186 | |
187 | C<$scheme> must be either missing or must be C<http> for HTTP. |
187 | C<$scheme> must be either missing or must be C<http> for HTTP. |
|
|
188 | |
|
|
189 | If not specified, then the default proxy is used (see |
|
|
190 | C<AnyEvent::HTTP::set_proxy>). |
188 | |
191 | |
189 | =item body => $string |
192 | =item body => $string |
190 | |
193 | |
191 | The request body, usually empty. Will be sent as-is (future versions of |
194 | The request body, usually empty. Will be sent as-is (future versions of |
192 | this module might offer more options). |
195 | this module might offer more options). |
… | |
… | |
529 | while ( |
532 | while ( |
530 | m{ |
533 | m{ |
531 | \G\s* |
534 | \G\s* |
532 | (?: |
535 | (?: |
533 | expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+) |
536 | expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+) |
534 | | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) )? |
537 | | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^;,[:space:]]*) ) )? |
535 | ) |
538 | ) |
536 | }gcxsi |
539 | }gcxsi |
537 | ) { |
540 | ) { |
538 | my $name = $2; |
541 | my $name = $2; |
539 | my $value = $4; |
542 | my $value = $4; |
… | |
… | |
546 | # quoted |
549 | # quoted |
547 | $value = $3; |
550 | $value = $3; |
548 | $value =~ s/\\(.)/$1/gs; |
551 | $value =~ s/\\(.)/$1/gs; |
549 | } |
552 | } |
550 | |
553 | |
551 | push @kv, lc $name, $value; |
554 | push @kv, @kv ? lc $name : $name, $value; |
552 | |
555 | |
553 | last unless /\G\s*;/gc; |
556 | last unless /\G\s*;/gc; |
554 | } |
557 | } |
555 | |
558 | |
556 | last unless @kv; |
559 | last unless @kv; |
… | |
… | |
709 | my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE; |
712 | my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE; |
710 | |
713 | |
711 | return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" }) |
714 | return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" }) |
712 | if $recurse < 0; |
715 | if $recurse < 0; |
713 | |
716 | |
714 | my $proxy = $arg{proxy} || $PROXY; |
717 | my $proxy = exists $arg{proxy} ? $arg{proxy} : $PROXY; |
715 | my $timeout = $arg{timeout} || $TIMEOUT; |
718 | my $timeout = $arg{timeout} || $TIMEOUT; |
716 | |
719 | |
717 | my ($uscheme, $uauthority, $upath, $query, undef) = # ignore fragment |
720 | my ($uscheme, $uauthority, $upath, $query, undef) = # ignore fragment |
718 | $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?|; |
721 | $url =~ m|^([^:]+):(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?$|; |
719 | |
722 | |
720 | $uscheme = lc $uscheme; |
723 | $uscheme = lc $uscheme; |
721 | |
724 | |
722 | my $uport = $uscheme eq "http" ? 80 |
725 | my $uport = $uscheme eq "http" ? 80 |
723 | : $uscheme eq "https" ? 443 |
726 | : $uscheme eq "https" ? 443 |
… | |
… | |
1172 | Sets the default proxy server to use. The proxy-url must begin with a |
1175 | Sets the default proxy server to use. The proxy-url must begin with a |
1173 | string of the form C<http://host:port>, croaks otherwise. |
1176 | string of the form C<http://host:port>, croaks otherwise. |
1174 | |
1177 | |
1175 | To clear an already-set proxy, use C<undef>. |
1178 | To clear an already-set proxy, use C<undef>. |
1176 | |
1179 | |
|
|
1180 | When AnyEvent::HTTP is laoded for the first time it will query the |
|
|
1181 | default proxy from the operating system, currently by looking at |
|
|
1182 | C<$ENV{http_proxy>}. |
|
|
1183 | |
1177 | =item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] |
1184 | =item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] |
1178 | |
1185 | |
1179 | Remove all cookies from the cookie jar that have been expired. If |
1186 | Remove all cookies from the cookie jar that have been expired. If |
1180 | C<$session_end> is given and true, then additionally remove all session |
1187 | C<$session_end> is given and true, then additionally remove all session |
1181 | cookies. |
1188 | cookies. |
… | |
… | |
1325 | # initialise proxy from environment |
1332 | # initialise proxy from environment |
1326 | eval { |
1333 | eval { |
1327 | set_proxy $ENV{http_proxy}; |
1334 | set_proxy $ENV{http_proxy}; |
1328 | }; |
1335 | }; |
1329 | |
1336 | |
|
|
1337 | =head2 SHOWCASE |
|
|
1338 | |
|
|
1339 | This section contaisn some more elaborate "real-world" examples or code |
|
|
1340 | snippets. |
|
|
1341 | |
|
|
1342 | =head2 HTTP/1.1 FILE DOWNLOAD |
|
|
1343 | |
|
|
1344 | Downloading files with HTTP can be quite tricky, especially when something |
|
|
1345 | goes wrong and you want to resume. |
|
|
1346 | |
|
|
1347 | Here is a function that initiates and resumes a download. It uses the |
|
|
1348 | last modified time to check for file content changes, and works with many |
|
|
1349 | HTTP/1.0 servers as well, and usually falls back to a complete re-download |
|
|
1350 | on older servers. |
|
|
1351 | |
|
|
1352 | It calls the completion callback with either C<undef>, which means a |
|
|
1353 | nonretryable error occured, C<0> when the download was partial and should |
|
|
1354 | be retried, and C<1> if it was successful. |
|
|
1355 | |
|
|
1356 | use AnyEvent::HTTP; |
|
|
1357 | |
|
|
1358 | sub download($$$) { |
|
|
1359 | my ($url, $file, $cb) = @_; |
|
|
1360 | |
|
|
1361 | open my $fh, "+<", $file |
|
|
1362 | or die "$file: $!"; |
|
|
1363 | |
|
|
1364 | my %hdr; |
|
|
1365 | my $ofs = 0; |
|
|
1366 | |
|
|
1367 | warn stat $fh; |
|
|
1368 | warn -s _; |
|
|
1369 | if (stat $fh and -s _) { |
|
|
1370 | $ofs = -s _; |
|
|
1371 | warn "-s is ", $ofs;#d# |
|
|
1372 | $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; |
|
|
1373 | $hdr{"range"} = "bytes=$ofs-"; |
|
|
1374 | } |
|
|
1375 | |
|
|
1376 | http_get $url, |
|
|
1377 | headers => \%hdr, |
|
|
1378 | on_header => sub { |
|
|
1379 | my ($hdr) = @_; |
|
|
1380 | |
|
|
1381 | if ($hdr->{Status} == 200 && $ofs) { |
|
|
1382 | # resume failed |
|
|
1383 | truncate $fh, $ofs = 0; |
|
|
1384 | } |
|
|
1385 | |
|
|
1386 | sysseek $fh, $ofs, 0; |
|
|
1387 | |
|
|
1388 | 1 |
|
|
1389 | }, |
|
|
1390 | on_body => sub { |
|
|
1391 | my ($data, $hdr) = @_; |
|
|
1392 | |
|
|
1393 | if ($hdr->{Status} =~ /^2/) { |
|
|
1394 | length $data == syswrite $fh, $data |
|
|
1395 | or return; # abort on write errors |
|
|
1396 | } |
|
|
1397 | |
|
|
1398 | 1 |
|
|
1399 | }, |
|
|
1400 | sub { |
|
|
1401 | my (undef, $hdr) = @_; |
|
|
1402 | |
|
|
1403 | my $status = $hdr->{Status}; |
|
|
1404 | |
|
|
1405 | if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) { |
|
|
1406 | utime $fh, $time, $time; |
|
|
1407 | } |
|
|
1408 | |
|
|
1409 | if ($status == 200 || $status == 206 || $status == 416) { |
|
|
1410 | # download ok || resume ok || file already fully downloaded |
|
|
1411 | $cb->(1, $hdr); |
|
|
1412 | |
|
|
1413 | } elsif ($status == 412) { |
|
|
1414 | # file has changed while resuming, delete and retry |
|
|
1415 | unlink $file; |
|
|
1416 | $cb->(0, $hdr); |
|
|
1417 | |
|
|
1418 | } elsif ($status == 500 or $status == 503 or $status =~ /^59/) { |
|
|
1419 | # retry later |
|
|
1420 | $cb->(0, $hdr); |
|
|
1421 | |
|
|
1422 | } else { |
|
|
1423 | $cb->(undef, $hdr); |
|
|
1424 | } |
|
|
1425 | } |
|
|
1426 | ; |
|
|
1427 | } |
|
|
1428 | |
|
|
1429 | download "http://server/somelargefile", "/tmp/somelargefile", sub { |
|
|
1430 | if ($_[0]) { |
|
|
1431 | print "OK!\n"; |
|
|
1432 | } elsif (defined $_[0]) { |
|
|
1433 | print "please retry later\n"; |
|
|
1434 | } else { |
|
|
1435 | print "ERROR\n"; |
|
|
1436 | } |
|
|
1437 | }; |
|
|
1438 | |
1330 | =head2 SOCKS PROXIES |
1439 | =head3 SOCKS PROXIES |
1331 | |
1440 | |
1332 | Socks proxies are not directly supported by AnyEvent::HTTP. You can |
1441 | Socks proxies are not directly supported by AnyEvent::HTTP. You can |
1333 | compile your perl to support socks, or use an external program such as |
1442 | compile your perl to support socks, or use an external program such as |
1334 | F<socksify> (dante) or F<tsocks> to make your program use a socks proxy |
1443 | F<socksify> (dante) or F<tsocks> to make your program use a socks proxy |
1335 | transparently. |
1444 | transparently. |