ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.102 by root, Sat Feb 19 06:46:14 2011 UTC vs.
Revision 1.116 by root, Fri May 17 07:19:23 2013 UTC

46use AnyEvent::Util (); 46use AnyEvent::Util ();
47use AnyEvent::Handle (); 47use AnyEvent::Handle ();
48 48
49use base Exporter::; 49use base Exporter::;
50 50
51our $VERSION = '2.04'; 51our $VERSION = '2.15';
52 52
53our @EXPORT = qw(http_get http_post http_head http_request); 53our @EXPORT = qw(http_get http_post http_head http_request);
54 54
55our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 55our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
56our $MAX_RECURSE = 10; 56our $MAX_RECURSE = 10;
123C<590>-C<599> and the C<Reason> pseudo-header will contain an error 123C<590>-C<599> and the C<Reason> pseudo-header will contain an error
124message. Currently the following status codes are used: 124message. Currently the following status codes are used:
125 125
126=over 4 126=over 4
127 127
128=item 595 - errors during connection etsbalishment, proxy handshake. 128=item 595 - errors during connection establishment, proxy handshake.
129 129
130=item 596 - errors during TLS negotiation, request sending and header processing. 130=item 596 - errors during TLS negotiation, request sending and header processing.
131 131
132=item 597 - errors during body receiving or processing. 132=item 597 - errors during body receiving or processing.
133 133
154 154
155=over 4 155=over 4
156 156
157=item recurse => $count (default: $MAX_RECURSE) 157=item recurse => $count (default: $MAX_RECURSE)
158 158
159Whether to recurse requests or not, e.g. on redirects, authentication 159Whether to recurse requests or not, e.g. on redirects, authentication and
160retries and so on, and how often to do so. 160other retries and so on, and how often to do so.
161 161
162=item headers => hashref 162=item headers => hashref
163 163
164The request headers to use. Currently, C<http_request> may provide its own 164The request headers to use. Currently, C<http_request> may provide its own
165C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers and 165C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers and
168they won't be sent at all). 168they won't be sent at all).
169 169
170You really should provide your own C<User-Agent:> header value that is 170You really should provide your own C<User-Agent:> header value that is
171appropriate for your program - I wouldn't be surprised if the default 171appropriate for your program - I wouldn't be surprised if the default
172AnyEvent string gets blocked by webservers sooner or later. 172AnyEvent string gets blocked by webservers sooner or later.
173
174Also, make sure that your headers names and values do not contain any
175embedded newlines.
173 176
174=item timeout => $seconds 177=item timeout => $seconds
175 178
176The time-out to use for various stages - each connect attempt will reset 179The time-out to use for various stages - each connect attempt will reset
177the timeout, as will read or write activity, i.e. this is not an overall 180the timeout, as will read or write activity, i.e. this is not an overall
381 384
382Example: do a HTTP HEAD request on https://www.google.com/, use a 385Example: do a HTTP HEAD request on https://www.google.com/, use a
383timeout of 30 seconds. 386timeout of 30 seconds.
384 387
385 http_request 388 http_request
386 GET => "https://www.google.com", 389 HEAD => "https://www.google.com",
387 headers => { "user-agent" => "MySearchClient 1.0" }, 390 headers => { "user-agent" => "MySearchClient 1.0" },
388 timeout => 30, 391 timeout => 30,
389 sub { 392 sub {
390 my ($body, $hdr) = @_; 393 my ($body, $hdr) = @_;
391 use Data::Dumper; 394 use Data::Dumper;
686 689
687 $cb->(undef, $hdr); 690 $cb->(undef, $hdr);
688 () 691 ()
689} 692}
690 693
694our %IDEMPOTENT = (
695 DELETE => 1,
696 GET => 1,
697 HEAD => 1,
698 OPTIONS => 1,
699 PUT => 1,
700 TRACE => 1,
701
702 ACL => 1,
703 "BASELINE-CONTROL" => 1,
704 BIND => 1,
705 CHECKIN => 1,
706 CHECKOUT => 1,
707 COPY => 1,
708 LABEL => 1,
709 LINK => 1,
710 MERGE => 1,
711 MKACTIVITY => 1,
712 MKCALENDAR => 1,
713 MKCOL => 1,
714 MKREDIRECTREF => 1,
715 MKWORKSPACE => 1,
716 MOVE => 1,
717 ORDERPATCH => 1,
718 PROPFIND => 1,
719 PROPPATCH => 1,
720 REBIND => 1,
721 REPORT => 1,
722 SEARCH => 1,
723 UNBIND => 1,
724 UNCHECKOUT => 1,
725 UNLINK => 1,
726 UNLOCK => 1,
727 UPDATE => 1,
728 UPDATEREDIRECTREF => 1,
729 "VERSION-CONTROL" => 1,
730);
731
691sub http_request($$@) { 732sub http_request($$@) {
692 my $cb = pop; 733 my $cb = pop;
693 my ($method, $url, %arg) = @_; 734 my ($method, $url, %arg) = @_;
694 735
695 my %hdr; 736 my %hdr;
770 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"}; 811 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"};
771 812
772 $hdr{"content-length"} = length $arg{body} 813 $hdr{"content-length"} = length $arg{body}
773 if length $arg{body} || $method ne "GET"; 814 if length $arg{body} || $method ne "GET";
774 815
775 my $idempotent = $method =~ /^(?:GET|HEAD|PUT|DELETE|OPTIONS|TRACE)$/; 816 my $idempotent = $IDEMPOTENT{$method};
776 817
777 # default value for keepalive is true iff the request is for an idempotent method 818 # default value for keepalive is true iff the request is for an idempotent method
778 my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : $idempotent; 819 my $persistent = exists $arg{persistent} ? !!$arg{persistent} : $idempotent;
779 my $keepalive10 = exists $arg{keepalive10} ? $arg{keepalive10} : !$proxy; 820 my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : !$proxy;
780 my $keptalive; # true if this is actually a recycled connection 821 my $was_persistent; # true if this is actually a recycled connection
781 822
782 # the key to use in the keepalive cache 823 # the key to use in the keepalive cache
783 my $ka_key = "$uhost\x00$arg{sessionid}"; 824 my $ka_key = "$uscheme\x00$uhost\x00$uport\x00$arg{sessionid}";
784 825
785 $hdr{connection} = ($keepalive ? $keepalive10 ? "keep-alive " : "" : "close ") . "Te"; #1.1 826 $hdr{connection} = ($persistent ? $keepalive ? "keep-alive " : "" : "close ") . "Te"; #1.1
786 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 827 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
787 828
788 my %state = (connect_guard => 1); 829 my %state = (connect_guard => 1);
789 830
790 my $ae_error = 595; # connecting 831 my $ae_error = 595; # connecting
874 } elsif ($status == 307) { 915 } elsif ($status == 307) {
875 $redirect = 1; 916 $redirect = 1;
876 } 917 }
877 } 918 }
878 919
879 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive]) 920 my $finish = sub { # ($data, $err_status, $err_reason[, $persistent])
880 if ($state{handle}) { 921 if ($state{handle}) {
881 # handle keepalive 922 # handle keepalive
882 if ( 923 if (
883 $keepalive 924 $persistent
884 && $_[3] 925 && $_[3]
885 && ($hdr{HTTPVersion} < 1.1 926 && ($hdr{HTTPVersion} < 1.1
886 ? $hdr{connection} =~ /\bkeep-?alive\b/i 927 ? $hdr{connection} =~ /\bkeep-?alive\b/i
887 : $hdr{connection} !~ /\bclose\b/i) 928 : $hdr{connection} !~ /\bclose\b/i)
888 ) { 929 ) {
907 948
908 if ($redirect && exists $hdr{location}) { 949 if ($redirect && exists $hdr{location}) {
909 # we ignore any errors, as it is very common to receive 950 # we ignore any errors, as it is very common to receive
910 # Content-Length != 0 but no actual body 951 # Content-Length != 0 but no actual body
911 # we also access %hdr, as $_[1] might be an erro 952 # we also access %hdr, as $_[1] might be an erro
953 $state{recurse} =
912 http_request ( 954 http_request (
913 $method => $hdr{location}, 955 $method => $hdr{location},
914 %arg, 956 %arg,
915 recurse => $recurse - 1, 957 recurse => $recurse - 1,
916 Redirect => [$_[0], \%hdr], 958 Redirect => [$_[0], \%hdr],
959 sub {
960 %state = ();
917 $cb 961 &$cb
962 },
918 ); 963 );
919 } else { 964 } else {
920 $cb->($_[0], \%hdr); 965 $cb->($_[0], \%hdr);
921 } 966 }
922 }; 967 };
923 968
955 my $body = ""; 1000 my $body = "";
956 my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; 1001 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
957 1002
958 $state{read_chunk} = sub { 1003 $state{read_chunk} = sub {
959 $_[1] =~ /^([0-9a-fA-F]+)/ 1004 $_[1] =~ /^([0-9a-fA-F]+)/
960 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); 1005 or return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
961 1006
962 my $len = hex $1; 1007 my $len = hex $1;
963 1008
964 if ($len) { 1009 if ($len) {
965 $cl += $len; 1010 $cl += $len;
1035 } 1080 }
1036 }; 1081 };
1037 1082
1038 # if keepalive is enabled, then the server closing the connection 1083 # if keepalive is enabled, then the server closing the connection
1039 # before a response can happen legally - we retry on idempotent methods. 1084 # before a response can happen legally - we retry on idempotent methods.
1040 if ($keptalive && $idempotent) { 1085 if ($was_persistent && $idempotent) {
1041 my $old_eof = $hdl->{on_eof}; 1086 my $old_eof = $hdl->{on_eof};
1042 $hdl->{on_eof} = sub { 1087 $hdl->{on_eof} = sub {
1043 _destroy_state %state; 1088 _destroy_state %state;
1044 1089
1090 %state = ();
1091 $state{recurse} =
1045 http_request ( 1092 http_request (
1046 $method => $url, 1093 $method => $url,
1047 %arg, 1094 %arg,
1095 recurse => $recurse - 1,
1048 keepalive => 0, 1096 keepalive => 0,
1097 sub {
1098 %state = ();
1049 $cb 1099 &$cb
1100 }
1050 ); 1101 );
1051 }; 1102 };
1052 $hdl->on_read (sub { 1103 $hdl->on_read (sub {
1053 return unless %state; 1104 return unless %state;
1054 1105
1055 # as soon as we receive something, a connection close 1106 # as soon as we receive something, a connection close
1063 }; 1114 };
1064 1115
1065 my $prepare_handle = sub { 1116 my $prepare_handle = sub {
1066 my ($hdl) = $state{handle}; 1117 my ($hdl) = $state{handle};
1067 1118
1068 $hdl->timeout ($timeout);
1069 $hdl->on_error (sub { 1119 $hdl->on_error (sub {
1070 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => $_[2] }; 1120 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => $_[2] };
1071 }); 1121 });
1072 $hdl->on_eof (sub { 1122 $hdl->on_eof (sub {
1073 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" }; 1123 _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" };
1074 }); 1124 });
1125 $hdl->timeout_reset;
1126 $hdl->timeout ($timeout);
1075 }; 1127 };
1076 1128
1077 # connected to proxy (or origin server) 1129 # connected to proxy (or origin server)
1078 my $connect_cb = sub { 1130 my $connect_cb = sub {
1079 my $fh = shift 1131 my $fh = shift
1120 1172
1121 return unless $state{connect_guard}; 1173 return unless $state{connect_guard};
1122 1174
1123 # try to use an existing keepalive connection, but only if we, ourselves, plan 1175 # try to use an existing keepalive connection, but only if we, ourselves, plan
1124 # on a keepalive request (in theory, this should be a separate config option). 1176 # on a keepalive request (in theory, this should be a separate config option).
1125 if ($keepalive && $KA_CACHE{$ka_key}) { 1177 if ($persistent && $KA_CACHE{$ka_key}) {
1126 $keptalive = 1; 1178 $was_persistent = 1;
1179
1127 $state{handle} = ka_fetch $ka_key; 1180 $state{handle} = ka_fetch $ka_key;
1181 $state{handle}->destroyed
1182 and die "AnyEvent::HTTP: unexpectedly got a destructed handle (1), please report.";#d#
1128 $prepare_handle->(); 1183 $prepare_handle->();
1184 $state{handle}->destroyed
1185 and die "AnyEvent::HTTP: unexpectedly got a destructed handle (2), please report.";#d#
1129 $handle_actual_request->(); 1186 $handle_actual_request->();
1130 1187
1131 } else { 1188 } else {
1132 my $tcp_connect = $arg{tcp_connect} 1189 my $tcp_connect = $arg{tcp_connect}
1133 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; 1190 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
1175Sets the default proxy server to use. The proxy-url must begin with a 1232Sets the default proxy server to use. The proxy-url must begin with a
1176string of the form C<http://host:port>, croaks otherwise. 1233string of the form C<http://host:port>, croaks otherwise.
1177 1234
1178To clear an already-set proxy, use C<undef>. 1235To clear an already-set proxy, use C<undef>.
1179 1236
1180When AnyEvent::HTTP is laoded for the first time it will query the 1237When AnyEvent::HTTP is loaded for the first time it will query the
1181default proxy from the operating system, currently by looking at 1238default proxy from the operating system, currently by looking at
1182C<$ENV{http_proxy>}. 1239C<$ENV{http_proxy>}.
1183 1240
1184=item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] 1241=item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1185 1242
1197 1254
1198The key C<version> has to contain C<1>, otherwise the hash gets 1255The key C<version> has to contain C<1>, otherwise the hash gets
1199emptied. All other keys are hostnames or IP addresses pointing to 1256emptied. All other keys are hostnames or IP addresses pointing to
1200hash-references. The key for these inner hash references is the 1257hash-references. The key for these inner hash references is the
1201server path for which this cookie is meant, and the values are again 1258server path for which this cookie is meant, and the values are again
1202hash-references. The keys of those hash-references is the cookie name, and 1259hash-references. Each key of those hash-references is a cookie name, and
1203the value, you guessed it, is another hash-reference, this time with the 1260the value, you guessed it, is another hash-reference, this time with the
1204key-value pairs from the cookie, except for C<expires> and C<max-age>, 1261key-value pairs from the cookie, except for C<expires> and C<max-age>,
1205which have been replaced by a C<_expires> key that contains the cookie 1262which have been replaced by a C<_expires> key that contains the cookie
1206expiry timestamp. 1263expiry timestamp. Session cookies are indicated by not having an
1264C<_expires> key.
1207 1265
1208Here is an example of a cookie jar with a single cookie, so you have a 1266Here is an example of a cookie jar with a single cookie, so you have a
1209chance of understanding the above paragraph: 1267chance of understanding the above paragraph:
1210 1268
1211 { 1269 {
1235 1293
1236The default value for the C<recurse> request parameter (default: C<10>). 1294The default value for the C<recurse> request parameter (default: C<10>).
1237 1295
1238=item $AnyEvent::HTTP::TIMEOUT 1296=item $AnyEvent::HTTP::TIMEOUT
1239 1297
1240The default timeout for conenction operations (default: C<300>). 1298The default timeout for connection operations (default: C<300>).
1241 1299
1242=item $AnyEvent::HTTP::USERAGENT 1300=item $AnyEvent::HTTP::USERAGENT
1243 1301
1244The default value for the C<User-Agent> header (the default is 1302The default value for the C<User-Agent> header (the default is
1245C<Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). 1303C<Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
1259use 6, and Opera uses 8 because like, they have the fastest browser and 1317use 6, and Opera uses 8 because like, they have the fastest browser and
1260give a shit for everybody else on the planet. 1318give a shit for everybody else on the planet.
1261 1319
1262=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT 1320=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
1263 1321
1264The time after which idle persistent conenctions get closed by 1322The time after which idle persistent connections get closed by
1265AnyEvent::HTTP (default: C<3>). 1323AnyEvent::HTTP (default: C<3>).
1266 1324
1267=item $AnyEvent::HTTP::ACTIVE 1325=item $AnyEvent::HTTP::ACTIVE
1268 1326
1269The number of active connections. This is not the number of currently 1327The number of active connections. This is not the number of currently
1310 # other formats fail in the loop below 1368 # other formats fail in the loop below
1311 1369
1312 for (0..11) { 1370 for (0..11) {
1313 if ($m eq $month[$_]) { 1371 if ($m eq $month[$_]) {
1314 require Time::Local; 1372 require Time::Local;
1315 return Time::Local::timegm ($S, $M, $H, $d, $_, $y); 1373 return eval { Time::Local::timegm ($S, $M, $H, $d, $_, $y) };
1316 } 1374 }
1317 } 1375 }
1318 1376
1319 undef 1377 undef
1320} 1378}
1366 1424
1367 warn stat $fh; 1425 warn stat $fh;
1368 warn -s _; 1426 warn -s _;
1369 if (stat $fh and -s _) { 1427 if (stat $fh and -s _) {
1370 $ofs = -s _; 1428 $ofs = -s _;
1371 warn "-s is ", $ofs;#d# 1429 warn "-s is ", $ofs;
1372 $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; 1430 $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9];
1373 $hdr{"range"} = "bytes=$ofs-"; 1431 $hdr{"range"} = "bytes=$ofs-";
1374 } 1432 }
1375 1433
1376 http_get $url, 1434 http_get $url,

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines