ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.117 by root, Mon Sep 9 21:41:43 2013 UTC vs.
Revision 1.126 by root, Thu Jan 7 13:14:16 2016 UTC

46use AnyEvent::Util (); 46use AnyEvent::Util ();
47use AnyEvent::Handle (); 47use AnyEvent::Handle ();
48 48
49use base Exporter::; 49use base Exporter::;
50 50
51our $VERSION = '2.15'; 51our $VERSION = 2.22;
52 52
53our @EXPORT = qw(http_get http_post http_head http_request); 53our @EXPORT = qw(http_get http_post http_head http_request);
54 54
55our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 55our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
56our $MAX_RECURSE = 10; 56our $MAX_RECURSE = 10;
157=item recurse => $count (default: $MAX_RECURSE) 157=item recurse => $count (default: $MAX_RECURSE)
158 158
159Whether to recurse requests or not, e.g. on redirects, authentication and 159Whether to recurse requests or not, e.g. on redirects, authentication and
160other retries and so on, and how often to do so. 160other retries and so on, and how often to do so.
161 161
162Only redirects to http and https URLs are supported. While most common
163redirection forms are handled entirely within this module, some require
164the use of the optional L<URI> module. If it is required but missing, then
165the request will fail with an error.
166
162=item headers => hashref 167=item headers => hashref
163 168
164The request headers to use. Currently, C<http_request> may provide its own 169The request headers to use. Currently, C<http_request> may provide its own
165C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers and 170C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers and
166will provide defaults at least for C<TE:>, C<Referer:> and C<User-Agent:> 171will provide defaults at least for C<TE:>, C<Referer:> and C<User-Agent:>
189 194
190C<$scheme> must be either missing or must be C<http> for HTTP. 195C<$scheme> must be either missing or must be C<http> for HTTP.
191 196
192If not specified, then the default proxy is used (see 197If not specified, then the default proxy is used (see
193C<AnyEvent::HTTP::set_proxy>). 198C<AnyEvent::HTTP::set_proxy>).
199
200Currently, if your proxy requires authorization, you have to specify an
201appropriate "Proxy-Authorization" header in every request.
194 202
195=item body => $string 203=item body => $string
196 204
197The request body, usually empty. Will be sent as-is (future versions of 205The request body, usually empty. Will be sent as-is (future versions of
198this module might offer more options). 206this module might offer more options).
765 773
766 my $uport = $uscheme eq "http" ? 80 774 my $uport = $uscheme eq "http" ? 80
767 : $uscheme eq "https" ? 443 775 : $uscheme eq "https" ? 443
768 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); 776 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
769 777
770 $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 778 $uauthority =~ /^(?: .*\@ )? ([^\@]+?) (?: : (\d+) )?$/x
771 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); 779 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
772 780
773 my $uhost = lc $1; 781 my $uhost = lc $1;
774 $uport = $2 if defined $2; 782 $uport = $2 if defined $2;
775 783
821 my $was_persistent; # true if this is actually a recycled connection 829 my $was_persistent; # true if this is actually a recycled connection
822 830
823 # the key to use in the keepalive cache 831 # the key to use in the keepalive cache
824 my $ka_key = "$uscheme\x00$uhost\x00$uport\x00$arg{sessionid}"; 832 my $ka_key = "$uscheme\x00$uhost\x00$uport\x00$arg{sessionid}";
825 833
826 $hdr{connection} = ($persistent ? $keepalive ? "keep-alive " : "" : "close ") . "Te"; #1.1 834 $hdr{connection} = ($persistent ? $keepalive ? "keep-alive, " : "" : "close, ") . "Te"; #1.1
827 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 835 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
828 836
829 my %state = (connect_guard => 1); 837 my %state = (connect_guard => 1);
830 838
831 my $ae_error = 595; # connecting 839 my $ae_error = 595; # connecting
841 # send request 849 # send request
842 $hdl->push_write ( 850 $hdl->push_write (
843 "$method $rpath HTTP/1.1\015\012" 851 "$method $rpath HTTP/1.1\015\012"
844 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr) 852 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
845 . "\015\012" 853 . "\015\012"
846 . (delete $arg{body}) 854 . $arg{body}
847 ); 855 );
848 856
849 # return if error occurred during push_write() 857 # return if error occurred during push_write()
850 return unless %state; 858 return unless %state;
851 859
881 889
882 %hdr = (%$hdr, @pseudo); 890 %hdr = (%$hdr, @pseudo);
883 } 891 }
884 892
885 # redirect handling 893 # redirect handling
886 # microsoft and other shitheads don't give a shit for following standards, 894 # relative uri handling forced by microsoft and other shitheads.
887 # try to support some common forms of broken Location headers. 895 # we give our best and fall back to URI if available.
888 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) { 896 if (exists $hdr{location}) {
897 my $loc = $hdr{location};
898
899 if ($loc =~ m%^//%) { # //
900 $loc = "$uscheme:$loc";
901
902 } elsif ($loc eq "") {
903 $loc = $url;
904
905 } elsif ($loc !~ /^(?: $ | [^:\/?\#]+ : )/x) { # anything "simple"
889 $hdr{location} =~ s/^\.\/+//; 906 $loc =~ s/^\.\/+//;
890 907
908 if ($loc !~ m%^[.?#]%) {
891 my $url = "$rscheme://$uhost:$uport"; 909 my $prefix = "$uscheme://$uhost:$uport";
892 910
893 unless ($hdr{location} =~ s/^\///) { 911 unless ($loc =~ s/^\///) {
894 $url .= $upath; 912 $prefix .= $upath;
895 $url =~ s/\/[^\/]*$//; 913 $prefix =~ s/\/[^\/]*$//;
914 }
915
916 $loc = "$prefix/$loc";
917
918 } elsif (eval { require URI }) { # uri
919 $loc = URI->new_abs ($loc, $url)->as_string;
920
921 } else {
922 return _error %state, $cb, { @pseudo, Status => 599, Reason => "Cannot parse Location (URI module missing)" };
923 #$hdr{Status} = 599;
924 #$hdr{Reason} = "Unparsable Redirect (URI module missing)";
925 #$recurse = 0;
926 }
896 } 927 }
897 928
898 $hdr{location} = "$url/$hdr{location}"; 929 $hdr{location} = $loc;
899 } 930 }
900 931
901 my $redirect; 932 my $redirect;
902 933
903 if ($recurse) { 934 if ($recurse) {
905 936
906 # industry standard is to redirect POST as GET for 937 # industry standard is to redirect POST as GET for
907 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1. 938 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
908 # also, the UA should ask the user for 301 and 307 and POST, 939 # also, the UA should ask the user for 301 and 307 and POST,
909 # industry standard seems to be to simply follow. 940 # industry standard seems to be to simply follow.
910 # we go with the industry standard. 941 # we go with the industry standard. 308 is defined
942 # by rfc7538
911 if ($status == 301 or $status == 302 or $status == 303) { 943 if ($status == 301 or $status == 302 or $status == 303) {
944 $redirect = 1;
912 # HTTP/1.1 is unclear on how to mutate the method 945 # HTTP/1.1 is unclear on how to mutate the method
913 $method = "GET" unless $method eq "HEAD"; 946 unless ($method eq "HEAD") {
914 $redirect = 1; 947 $method = "GET";
948 delete $arg{body};
949 }
915 } elsif ($status == 307) { 950 } elsif ($status == 307 or $status == 308) {
916 $redirect = 1; 951 $redirect = 1;
917 } 952 }
918 } 953 }
919 954
920 my $finish = sub { # ($data, $err_status, $err_reason[, $persistent]) 955 my $finish = sub { # ($data, $err_status, $err_reason[, $persistent])
1088 _destroy_state %state; 1123 _destroy_state %state;
1089 1124
1090 %state = (); 1125 %state = ();
1091 $state{recurse} = 1126 $state{recurse} =
1092 http_request ( 1127 http_request (
1093 $method => $url, 1128 $method => $url,
1094 %arg, 1129 %arg,
1095 recurse => $recurse - 1, 1130 recurse => $recurse - 1,
1096 keepalive => 0, 1131 persistent => 0,
1097 sub { 1132 sub {
1098 %state = (); 1133 %state = ();
1099 &$cb 1134 &$cb
1100 } 1135 }
1101 ); 1136 );
1147 1182
1148 # now handle proxy-CONNECT method 1183 # now handle proxy-CONNECT method
1149 if ($proxy && $uscheme eq "https") { 1184 if ($proxy && $uscheme eq "https") {
1150 # oh dear, we have to wrap it into a connect request 1185 # oh dear, we have to wrap it into a connect request
1151 1186
1187 my $auth = exists $hdr{"proxy-authorization"}
1188 ? "proxy-authorization: " . (delete $hdr{"proxy-authorization"}) . "\015\012"
1189 : "";
1190
1152 # maybe re-use $uauthority with patched port? 1191 # maybe re-use $uauthority with patched port?
1153 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012"); 1192 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012$auth\015\012");
1154 $state{handle}->push_read (line => $qr_nlnl, sub { 1193 $state{handle}->push_read (line => $qr_nlnl, sub {
1155 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 1194 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
1156 or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }; 1195 or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" };
1157 1196
1158 if ($2 == 200) { 1197 if ($2 == 200) {
1161 } else { 1200 } else {
1162 _error %state, $cb, { @pseudo, Status => $2, Reason => $3 }; 1201 _error %state, $cb, { @pseudo, Status => $2, Reason => $3 };
1163 } 1202 }
1164 }); 1203 });
1165 } else { 1204 } else {
1205 delete $hdr{"proxy-authorization"} unless $proxy;
1206
1166 $handle_actual_request->(); 1207 $handle_actual_request->();
1167 } 1208 }
1168 }; 1209 };
1169 1210
1170 _get_slot $uhost, sub { 1211 _get_slot $uhost, sub {
1248save cookies to disk, and you should call this function after loading them 1289save cookies to disk, and you should call this function after loading them
1249again. If you have a long-running program you can additionally call this 1290again. If you have a long-running program you can additionally call this
1250function from time to time. 1291function from time to time.
1251 1292
1252A cookie jar is initially an empty hash-reference that is managed by this 1293A cookie jar is initially an empty hash-reference that is managed by this
1253module. It's format is subject to change, but currently it is like this: 1294module. Its format is subject to change, but currently it is as follows:
1254 1295
1255The key C<version> has to contain C<1>, otherwise the hash gets 1296The key C<version> has to contain C<1>, otherwise the hash gets
1256emptied. All other keys are hostnames or IP addresses pointing to 1297emptied. All other keys are hostnames or IP addresses pointing to
1257hash-references. The key for these inner hash references is the 1298hash-references. The key for these inner hash references is the
1258server path for which this cookie is meant, and the values are again 1299server path for which this cookie is meant, and the values are again
1303C<Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). 1344C<Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
1304 1345
1305=item $AnyEvent::HTTP::MAX_PER_HOST 1346=item $AnyEvent::HTTP::MAX_PER_HOST
1306 1347
1307The maximum number of concurrent connections to the same host (identified 1348The maximum number of concurrent connections to the same host (identified
1308by the hostname). If the limit is exceeded, then the additional requests 1349by the hostname). If the limit is exceeded, then additional requests
1309are queued until previous connections are closed. Both persistent and 1350are queued until previous connections are closed. Both persistent and
1310non-persistent connections are counted in this limit. 1351non-persistent connections are counted in this limit.
1311 1352
1312The default value for this is C<4>, and it is highly advisable to not 1353The default value for this is C<4>, and it is highly advisable to not
1313increase it much. 1354increase it much.

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines