ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.92 by root, Tue Jan 4 08:17:59 2011 UTC vs.
Revision 1.104 by root, Thu Feb 24 15:09:03 2011 UTC

15This module is an L<AnyEvent> user, you need to make sure that you use and 15This module is an L<AnyEvent> user, you need to make sure that you use and
16run a supported event loop. 16run a supported event loop.
17 17
18This module implements a simple, stateless and non-blocking HTTP 18This module implements a simple, stateless and non-blocking HTTP
19client. It supports GET, POST and other request methods, cookies and more, 19client. It supports GET, POST and other request methods, cookies and more,
20all on a very low level. It can follow redirects supports proxies and 20all on a very low level. It can follow redirects, supports proxies, and
21automatically limits the number of connections to the values specified in 21automatically limits the number of connections to the values specified in
22the RFC. 22the RFC.
23 23
24It should generally be a "good client" that is enough for most HTTP 24It should generally be a "good client" that is enough for most HTTP
25tasks. Simple tasks should be simple, but complex tasks should still be 25tasks. Simple tasks should be simple, but complex tasks should still be
46use AnyEvent::Util (); 46use AnyEvent::Util ();
47use AnyEvent::Handle (); 47use AnyEvent::Handle ();
48 48
49use base Exporter::; 49use base Exporter::;
50 50
51our $VERSION = '2.0'; 51our $VERSION = '2.1';
52 52
53our @EXPORT = qw(http_get http_post http_head http_request); 53our @EXPORT = qw(http_get http_post http_head http_request);
54 54
55our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 55our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
56our $MAX_RECURSE = 10; 56our $MAX_RECURSE = 10;
169 169
170You really should provide your own C<User-Agent:> header value that is 170You really should provide your own C<User-Agent:> header value that is
171appropriate for your program - I wouldn't be surprised if the default 171appropriate for your program - I wouldn't be surprised if the default
172AnyEvent string gets blocked by webservers sooner or later. 172AnyEvent string gets blocked by webservers sooner or later.
173 173
174Also, make sure that your headers names and values do not contain any
175embedded newlines.
176
174=item timeout => $seconds 177=item timeout => $seconds
175 178
176The time-out to use for various stages - each connect attempt will reset 179The time-out to use for various stages - each connect attempt will reset
177the timeout, as will read or write activity, i.e. this is not an overall 180the timeout, as will read or write activity, i.e. this is not an overall
178timeout. 181timeout.
179 182
180Default timeout is 5 minutes. 183Default timeout is 5 minutes.
181 184
182=item proxy => [$host, $port[, $scheme]] or undef 185=item proxy => [$host, $port[, $scheme]] or undef
183 186
184Use the given http proxy for all requests. If not specified, then the 187Use the given http proxy for all requests, or no proxy if C<undef> is
185default proxy (as specified by C<$ENV{http_proxy}>) is used. 188used.
186 189
187C<$scheme> must be either missing or must be C<http> for HTTP. 190C<$scheme> must be either missing or must be C<http> for HTTP.
191
192If not specified, then the default proxy is used (see
193C<AnyEvent::HTTP::set_proxy>).
188 194
189=item body => $string 195=item body => $string
190 196
191The request body, usually empty. Will be sent as-is (future versions of 197The request body, usually empty. Will be sent as-is (future versions of
192this module might offer more options). 198this module might offer more options).
529 while ( 535 while (
530 m{ 536 m{
531 \G\s* 537 \G\s*
532 (?: 538 (?:
533 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+) 539 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
534 | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) )? 540 | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^;,[:space:]]*) ) )?
535 ) 541 )
536 }gcxsi 542 }gcxsi
537 ) { 543 ) {
538 my $name = $2; 544 my $name = $2;
539 my $value = $4; 545 my $value = $4;
546 # quoted 552 # quoted
547 $value = $3; 553 $value = $3;
548 $value =~ s/\\(.)/$1/gs; 554 $value =~ s/\\(.)/$1/gs;
549 } 555 }
550 556
551 push @kv, lc $name, $value; 557 push @kv, @kv ? lc $name : $name, $value;
552 558
553 last unless /\G\s*;/gc; 559 last unless /\G\s*;/gc;
554 } 560 }
555 561
556 last unless @kv; 562 last unless @kv;
709 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE; 715 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE;
710 716
711 return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" }) 717 return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" })
712 if $recurse < 0; 718 if $recurse < 0;
713 719
714 my $proxy = $arg{proxy} || $PROXY; 720 my $proxy = exists $arg{proxy} ? $arg{proxy} : $PROXY;
715 my $timeout = $arg{timeout} || $TIMEOUT; 721 my $timeout = $arg{timeout} || $TIMEOUT;
716 722
717 my ($uscheme, $uauthority, $upath, $query, undef) = # ignore fragment 723 my ($uscheme, $uauthority, $upath, $query, undef) = # ignore fragment
718 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?|; 724 $url =~ m|^([^:]+):(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?$|;
719 725
720 $uscheme = lc $uscheme; 726 $uscheme = lc $uscheme;
721 727
722 my $uport = $uscheme eq "http" ? 80 728 my $uport = $uscheme eq "http" ? 80
723 : $uscheme eq "https" ? 443 729 : $uscheme eq "https" ? 443
770 if length $arg{body} || $method ne "GET"; 776 if length $arg{body} || $method ne "GET";
771 777
772 my $idempotent = $method =~ /^(?:GET|HEAD|PUT|DELETE|OPTIONS|TRACE)$/; 778 my $idempotent = $method =~ /^(?:GET|HEAD|PUT|DELETE|OPTIONS|TRACE)$/;
773 779
774 # default value for keepalive is true iff the request is for an idempotent method 780 # default value for keepalive is true iff the request is for an idempotent method
775 my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : $idempotent; 781 my $persistent = exists $arg{persistent} ? !!$arg{persistent} : $idempotent;
776 my $keepalive10 = exists $arg{keepalive10} ? $arg{keepalive10} : !$proxy; 782 my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : !$proxy;
777 my $keptalive; # true if this is actually a recycled connection 783 my $was_persistent; # true if this is actually a recycled connection
778 784
779 # the key to use in the keepalive cache 785 # the key to use in the keepalive cache
780 my $ka_key = "$uhost\x00$arg{sessionid}"; 786 my $ka_key = "$uhost\x00$arg{sessionid}";
781 787
782 $hdr{connection} = ($keepalive ? $keepalive10 ? "keep-alive " : "" : "close ") . "Te"; #1.1 788 $hdr{connection} = ($persistent ? $keepalive ? "keep-alive " : "" : "close ") . "Te"; #1.1
783 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 789 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
784 790
785 my %state = (connect_guard => 1); 791 my %state = (connect_guard => 1);
786 792
787 my $ae_error = 595; # connecting 793 my $ae_error = 595; # connecting
871 } elsif ($status == 307) { 877 } elsif ($status == 307) {
872 $redirect = 1; 878 $redirect = 1;
873 } 879 }
874 } 880 }
875 881
876 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive]) 882 my $finish = sub { # ($data, $err_status, $err_reason[, $persistent])
877 if ($state{handle}) { 883 if ($state{handle}) {
878 # handle keepalive 884 # handle keepalive
879 if ( 885 if (
880 $keepalive 886 $persistent
881 && $_[3] 887 && $_[3]
882 && ($hdr{HTTPVersion} < 1.1 888 && ($hdr{HTTPVersion} < 1.1
883 ? $hdr{connection} =~ /\bkeep-?alive\b/i 889 ? $hdr{connection} =~ /\bkeep-?alive\b/i
884 : $hdr{connection} !~ /\bclose\b/i) 890 : $hdr{connection} !~ /\bclose\b/i)
885 ) { 891 ) {
904 910
905 if ($redirect && exists $hdr{location}) { 911 if ($redirect && exists $hdr{location}) {
906 # we ignore any errors, as it is very common to receive 912 # we ignore any errors, as it is very common to receive
907 # Content-Length != 0 but no actual body 913 # Content-Length != 0 but no actual body
908 # we also access %hdr, as $_[1] might be an erro 914 # we also access %hdr, as $_[1] might be an erro
915 $state{recurse} =
909 http_request ( 916 http_request (
910 $method => $hdr{location}, 917 $method => $hdr{location},
911 %arg, 918 %arg,
912 recurse => $recurse - 1, 919 recurse => $recurse - 1,
913 Redirect => [$_[0], \%hdr], 920 Redirect => [$_[0], \%hdr],
921 sub {
922 %state = ();
914 $cb 923 &$cb
924 },
915 ); 925 );
916 } else { 926 } else {
917 $cb->($_[0], \%hdr); 927 $cb->($_[0], \%hdr);
918 } 928 }
919 }; 929 };
920 930
1032 } 1042 }
1033 }; 1043 };
1034 1044
1035 # if keepalive is enabled, then the server closing the connection 1045 # if keepalive is enabled, then the server closing the connection
1036 # before a response can happen legally - we retry on idempotent methods. 1046 # before a response can happen legally - we retry on idempotent methods.
1037 if ($keptalive && $idempotent) { 1047 if ($was_persistent && $idempotent) {
1038 my $old_eof = $hdl->{on_eof}; 1048 my $old_eof = $hdl->{on_eof};
1039 $hdl->{on_eof} = sub { 1049 $hdl->{on_eof} = sub {
1040 _destroy_state %state; 1050 _destroy_state %state;
1041 1051
1052 %state = ();
1053 $state{recurse} =
1042 http_request ( 1054 http_request (
1043 $method => $url, 1055 $method => $url,
1044 %arg, 1056 %arg,
1045 keepalive => 0, 1057 keepalive => 0,
1058 sub {
1059 %state = ();
1046 $cb 1060 &$cb
1061 }
1047 ); 1062 );
1048 }; 1063 };
1049 $hdl->on_read (sub { 1064 $hdl->on_read (sub {
1050 return unless %state; 1065 return unless %state;
1051 1066
1052 # as soon as we receive something, a connection close 1067 # as soon as we receive something, a connection close
1060 }; 1075 };
1061 1076
1062 my $prepare_handle = sub { 1077 my $prepare_handle = sub {
1063 my ($hdl) = $state{handle}; 1078 my ($hdl) = $state{handle};
1064 1079
1065 $hdl->timeout ($timeout);
1066 $hdl->on_error (sub { 1080 $hdl->on_error (sub {
1067 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => $_[2] }; 1081 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => $_[2] };
1068 }); 1082 });
1069 $hdl->on_eof (sub { 1083 $hdl->on_eof (sub {
1070 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" }; 1084 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" };
1071 }); 1085 });
1086 $hdl->timeout_reset;
1087 $hdl->timeout ($timeout);
1072 }; 1088 };
1073 1089
1074 # connected to proxy (or origin server) 1090 # connected to proxy (or origin server)
1075 my $connect_cb = sub { 1091 my $connect_cb = sub {
1076 my $fh = shift 1092 my $fh = shift
1117 1133
1118 return unless $state{connect_guard}; 1134 return unless $state{connect_guard};
1119 1135
1120 # try to use an existing keepalive connection, but only if we, ourselves, plan 1136 # try to use an existing keepalive connection, but only if we, ourselves, plan
1121 # on a keepalive request (in theory, this should be a separate config option). 1137 # on a keepalive request (in theory, this should be a separate config option).
1122 if ($keepalive && $KA_CACHE{$ka_key}) { 1138 if ($persistent && $KA_CACHE{$ka_key}) {
1123 $keptalive = 1; 1139 $was_persistent = 1;
1140
1124 $state{handle} = ka_fetch $ka_key; 1141 $state{handle} = ka_fetch $ka_key;
1142 $state{handle}->destroyed
1143 and die "got a destructed habndle. pah\n";#d#
1125 $prepare_handle->(); 1144 $prepare_handle->();
1145 $state{handle}->destroyed
1146 and die "got a destructed habndle. pa2\n";#d#
1126 $handle_actual_request->(); 1147 $handle_actual_request->();
1148 $state{handle}->destroyed
1149 and die "got a destructed habndle. pa3\n";#d#
1127 1150
1128 } else { 1151 } else {
1129 my $tcp_connect = $arg{tcp_connect} 1152 my $tcp_connect = $arg{tcp_connect}
1130 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; 1153 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
1131 1154
1171 1194
1172Sets the default proxy server to use. The proxy-url must begin with a 1195Sets the default proxy server to use. The proxy-url must begin with a
1173string of the form C<http://host:port>, croaks otherwise. 1196string of the form C<http://host:port>, croaks otherwise.
1174 1197
1175To clear an already-set proxy, use C<undef>. 1198To clear an already-set proxy, use C<undef>.
1199
1200When AnyEvent::HTTP is laoded for the first time it will query the
1201default proxy from the operating system, currently by looking at
1202C<$ENV{http_proxy>}.
1176 1203
1177=item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] 1204=item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1178 1205
1179Remove all cookies from the cookie jar that have been expired. If 1206Remove all cookies from the cookie jar that have been expired. If
1180C<$session_end> is given and true, then additionally remove all session 1207C<$session_end> is given and true, then additionally remove all session
1325# initialise proxy from environment 1352# initialise proxy from environment
1326eval { 1353eval {
1327 set_proxy $ENV{http_proxy}; 1354 set_proxy $ENV{http_proxy};
1328}; 1355};
1329 1356
1357=head2 SHOWCASE
1358
1359This section contaisn some more elaborate "real-world" examples or code
1360snippets.
1361
1362=head2 HTTP/1.1 FILE DOWNLOAD
1363
1364Downloading files with HTTP can be quite tricky, especially when something
1365goes wrong and you want to resume.
1366
1367Here is a function that initiates and resumes a download. It uses the
1368last modified time to check for file content changes, and works with many
1369HTTP/1.0 servers as well, and usually falls back to a complete re-download
1370on older servers.
1371
1372It calls the completion callback with either C<undef>, which means a
1373nonretryable error occured, C<0> when the download was partial and should
1374be retried, and C<1> if it was successful.
1375
1376 use AnyEvent::HTTP;
1377
1378 sub download($$$) {
1379 my ($url, $file, $cb) = @_;
1380
1381 open my $fh, "+<", $file
1382 or die "$file: $!";
1383
1384 my %hdr;
1385 my $ofs = 0;
1386
1387 warn stat $fh;
1388 warn -s _;
1389 if (stat $fh and -s _) {
1390 $ofs = -s _;
1391 warn "-s is ", $ofs;#d#
1392 $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9];
1393 $hdr{"range"} = "bytes=$ofs-";
1394 }
1395
1396 http_get $url,
1397 headers => \%hdr,
1398 on_header => sub {
1399 my ($hdr) = @_;
1400
1401 if ($hdr->{Status} == 200 && $ofs) {
1402 # resume failed
1403 truncate $fh, $ofs = 0;
1404 }
1405
1406 sysseek $fh, $ofs, 0;
1407
1408 1
1409 },
1410 on_body => sub {
1411 my ($data, $hdr) = @_;
1412
1413 if ($hdr->{Status} =~ /^2/) {
1414 length $data == syswrite $fh, $data
1415 or return; # abort on write errors
1416 }
1417
1418 1
1419 },
1420 sub {
1421 my (undef, $hdr) = @_;
1422
1423 my $status = $hdr->{Status};
1424
1425 if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) {
1426 utime $fh, $time, $time;
1427 }
1428
1429 if ($status == 200 || $status == 206 || $status == 416) {
1430 # download ok || resume ok || file already fully downloaded
1431 $cb->(1, $hdr);
1432
1433 } elsif ($status == 412) {
1434 # file has changed while resuming, delete and retry
1435 unlink $file;
1436 $cb->(0, $hdr);
1437
1438 } elsif ($status == 500 or $status == 503 or $status =~ /^59/) {
1439 # retry later
1440 $cb->(0, $hdr);
1441
1442 } else {
1443 $cb->(undef, $hdr);
1444 }
1445 }
1446 ;
1447 }
1448
1449 download "http://server/somelargefile", "/tmp/somelargefile", sub {
1450 if ($_[0]) {
1451 print "OK!\n";
1452 } elsif (defined $_[0]) {
1453 print "please retry later\n";
1454 } else {
1455 print "ERROR\n";
1456 }
1457 };
1458
1330=head2 SOCKS PROXIES 1459=head3 SOCKS PROXIES
1331 1460
1332Socks proxies are not directly supported by AnyEvent::HTTP. You can 1461Socks proxies are not directly supported by AnyEvent::HTTP. You can
1333compile your perl to support socks, or use an external program such as 1462compile your perl to support socks, or use an external program such as
1334F<socksify> (dante) or F<tsocks> to make your program use a socks proxy 1463F<socksify> (dante) or F<tsocks> to make your program use a socks proxy
1335transparently. 1464transparently.

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines