… | |
… | |
46 | use AnyEvent::Util (); |
46 | use AnyEvent::Util (); |
47 | use AnyEvent::Handle (); |
47 | use AnyEvent::Handle (); |
48 | |
48 | |
49 | use base Exporter::; |
49 | use base Exporter::; |
50 | |
50 | |
51 | our $VERSION = '2.15'; |
51 | our $VERSION = 2.21; |
52 | |
52 | |
53 | our @EXPORT = qw(http_get http_post http_head http_request); |
53 | our @EXPORT = qw(http_get http_post http_head http_request); |
54 | |
54 | |
55 | our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; |
55 | our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; |
56 | our $MAX_RECURSE = 10; |
56 | our $MAX_RECURSE = 10; |
… | |
… | |
157 | =item recurse => $count (default: $MAX_RECURSE) |
157 | =item recurse => $count (default: $MAX_RECURSE) |
158 | |
158 | |
159 | Whether to recurse requests or not, e.g. on redirects, authentication and |
159 | Whether to recurse requests or not, e.g. on redirects, authentication and |
160 | other retries and so on, and how often to do so. |
160 | other retries and so on, and how often to do so. |
161 | |
161 | |
|
|
162 | Only redirects to http and https URLs are supported. While most common |
|
|
163 | redirection forms are handled entirely within this module, some require |
|
|
164 | the use of the optional L<URI> module. If it is required but missing, then |
|
|
165 | the request will fail with an error. |
|
|
166 | |
162 | =item headers => hashref |
167 | =item headers => hashref |
163 | |
168 | |
164 | The request headers to use. Currently, C<http_request> may provide its own |
169 | The request headers to use. Currently, C<http_request> may provide its own |
165 | C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers and |
170 | C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers and |
166 | will provide defaults at least for C<TE:>, C<Referer:> and C<User-Agent:> |
171 | will provide defaults at least for C<TE:>, C<Referer:> and C<User-Agent:> |
… | |
… | |
189 | |
194 | |
190 | C<$scheme> must be either missing or must be C<http> for HTTP. |
195 | C<$scheme> must be either missing or must be C<http> for HTTP. |
191 | |
196 | |
192 | If not specified, then the default proxy is used (see |
197 | If not specified, then the default proxy is used (see |
193 | C<AnyEvent::HTTP::set_proxy>). |
198 | C<AnyEvent::HTTP::set_proxy>). |
|
|
199 | |
|
|
200 | Currently, if your proxy requires authorization, you have to specify an |
|
|
201 | appropriate "Proxy-Authorization" header in every request. |
194 | |
202 | |
195 | =item body => $string |
203 | =item body => $string |
196 | |
204 | |
197 | The request body, usually empty. Will be sent as-is (future versions of |
205 | The request body, usually empty. Will be sent as-is (future versions of |
198 | this module might offer more options). |
206 | this module might offer more options). |
… | |
… | |
765 | |
773 | |
766 | my $uport = $uscheme eq "http" ? 80 |
774 | my $uport = $uscheme eq "http" ? 80 |
767 | : $uscheme eq "https" ? 443 |
775 | : $uscheme eq "https" ? 443 |
768 | : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); |
776 | : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); |
769 | |
777 | |
770 | $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x |
778 | $uauthority =~ /^(?: .*\@ )? ([^\@]+?) (?: : (\d+) )?$/x |
771 | or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); |
779 | or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); |
772 | |
780 | |
773 | my $uhost = lc $1; |
781 | my $uhost = lc $1; |
774 | $uport = $2 if defined $2; |
782 | $uport = $2 if defined $2; |
775 | |
783 | |
… | |
… | |
841 | # send request |
849 | # send request |
842 | $hdl->push_write ( |
850 | $hdl->push_write ( |
843 | "$method $rpath HTTP/1.1\015\012" |
851 | "$method $rpath HTTP/1.1\015\012" |
844 | . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr) |
852 | . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr) |
845 | . "\015\012" |
853 | . "\015\012" |
846 | . (delete $arg{body}) |
854 | . $arg{body} |
847 | ); |
855 | ); |
848 | |
856 | |
849 | # return if error occurred during push_write() |
857 | # return if error occurred during push_write() |
850 | return unless %state; |
858 | return unless %state; |
851 | |
859 | |
… | |
… | |
881 | |
889 | |
882 | %hdr = (%$hdr, @pseudo); |
890 | %hdr = (%$hdr, @pseudo); |
883 | } |
891 | } |
884 | |
892 | |
885 | # redirect handling |
893 | # redirect handling |
886 | # microsoft and other shitheads don't give a shit for following standards, |
894 | # relative uri handling forced by microsoft and other shitheads. |
887 | # try to support some common forms of broken Location headers. |
895 | # we give our best and fall back to URI if available. |
888 | if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) { |
896 | if (exists $hdr{location}) { |
|
|
897 | my $loc = $hdr{location}; |
|
|
898 | |
|
|
899 | if ($loc =~ m%^//%) { # // |
|
|
900 | $loc = "$rscheme:$loc"; |
|
|
901 | |
|
|
902 | } elsif ($loc eq "") { |
|
|
903 | $loc = $url; |
|
|
904 | |
|
|
905 | } elsif ($loc !~ /^(?: $ | [^:\/?\#]+ : )/x) { # anything "simple" |
889 | $hdr{location} =~ s/^\.\/+//; |
906 | $loc =~ s/^\.\/+//; |
890 | |
907 | |
|
|
908 | if ($loc !~ m%^[.?#]%) { |
891 | my $url = "$rscheme://$uhost:$uport"; |
909 | my $prefix = "$rscheme://$uhost:$uport"; |
892 | |
910 | |
893 | unless ($hdr{location} =~ s/^\///) { |
911 | unless ($loc =~ s/^\///) { |
894 | $url .= $upath; |
912 | $prefix .= $upath; |
895 | $url =~ s/\/[^\/]*$//; |
913 | $prefix =~ s/\/[^\/]*$//; |
|
|
914 | } |
|
|
915 | |
|
|
916 | $loc = "$prefix/$loc"; |
|
|
917 | |
|
|
918 | } elsif (eval { require URI }) { # uri |
|
|
919 | $loc = URI->new_abs ($loc, $url)->as_string; |
|
|
920 | |
|
|
921 | } else { |
|
|
922 | return _error %state, $cb, { @pseudo, Status => 599, Reason => "Cannot parse Location (URI module missing)" }; |
|
|
923 | #$hdr{Status} = 599; |
|
|
924 | #$hdr{Reason} = "Unparsable Redirect (URI module missing)"; |
|
|
925 | #$recurse = 0; |
|
|
926 | } |
896 | } |
927 | } |
897 | |
928 | |
898 | $hdr{location} = "$url/$hdr{location}"; |
929 | $hdr{location} = $loc; |
899 | } |
930 | } |
900 | |
931 | |
901 | my $redirect; |
932 | my $redirect; |
902 | |
933 | |
903 | if ($recurse) { |
934 | if ($recurse) { |
… | |
… | |
905 | |
936 | |
906 | # industry standard is to redirect POST as GET for |
937 | # industry standard is to redirect POST as GET for |
907 | # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1. |
938 | # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1. |
908 | # also, the UA should ask the user for 301 and 307 and POST, |
939 | # also, the UA should ask the user for 301 and 307 and POST, |
909 | # industry standard seems to be to simply follow. |
940 | # industry standard seems to be to simply follow. |
910 | # we go with the industry standard. |
941 | # we go with the industry standard. 308 is defined |
|
|
942 | # by rfc7538 |
911 | if ($status == 301 or $status == 302 or $status == 303) { |
943 | if ($status == 301 or $status == 302 or $status == 303) { |
|
|
944 | $redirect = 1; |
912 | # HTTP/1.1 is unclear on how to mutate the method |
945 | # HTTP/1.1 is unclear on how to mutate the method |
913 | $method = "GET" unless $method eq "HEAD"; |
946 | unless ($method eq "HEAD") { |
914 | $redirect = 1; |
947 | $method = "GET"; |
|
|
948 | delete $arg{body}; |
|
|
949 | } |
915 | } elsif ($status == 307) { |
950 | } elsif ($status == 307 or $status == 308) { |
916 | $redirect = 1; |
951 | $redirect = 1; |
917 | } |
952 | } |
918 | } |
953 | } |
919 | |
954 | |
920 | my $finish = sub { # ($data, $err_status, $err_reason[, $persistent]) |
955 | my $finish = sub { # ($data, $err_status, $err_reason[, $persistent]) |
… | |
… | |
1088 | _destroy_state %state; |
1123 | _destroy_state %state; |
1089 | |
1124 | |
1090 | %state = (); |
1125 | %state = (); |
1091 | $state{recurse} = |
1126 | $state{recurse} = |
1092 | http_request ( |
1127 | http_request ( |
1093 | $method => $url, |
1128 | $method => $url, |
1094 | %arg, |
1129 | %arg, |
1095 | recurse => $recurse - 1, |
1130 | recurse => $recurse - 1, |
1096 | keepalive => 0, |
1131 | persistent => 0, |
1097 | sub { |
1132 | sub { |
1098 | %state = (); |
1133 | %state = (); |
1099 | &$cb |
1134 | &$cb |
1100 | } |
1135 | } |
1101 | ); |
1136 | ); |
… | |
… | |
1147 | |
1182 | |
1148 | # now handle proxy-CONNECT method |
1183 | # now handle proxy-CONNECT method |
1149 | if ($proxy && $uscheme eq "https") { |
1184 | if ($proxy && $uscheme eq "https") { |
1150 | # oh dear, we have to wrap it into a connect request |
1185 | # oh dear, we have to wrap it into a connect request |
1151 | |
1186 | |
|
|
1187 | my $auth = exists $hdr{"proxy-authorization"} |
|
|
1188 | ? "proxy-authorization: " . (delete $hdr{"proxy-authorization"}) . "\015\012" |
|
|
1189 | : ""; |
|
|
1190 | |
1152 | # maybe re-use $uauthority with patched port? |
1191 | # maybe re-use $uauthority with patched port? |
1153 | $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012"); |
1192 | $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012$auth\015\012"); |
1154 | $state{handle}->push_read (line => $qr_nlnl, sub { |
1193 | $state{handle}->push_read (line => $qr_nlnl, sub { |
1155 | $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix |
1194 | $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix |
1156 | or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }; |
1195 | or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }; |
1157 | |
1196 | |
1158 | if ($2 == 200) { |
1197 | if ($2 == 200) { |
… | |
… | |
1161 | } else { |
1200 | } else { |
1162 | _error %state, $cb, { @pseudo, Status => $2, Reason => $3 }; |
1201 | _error %state, $cb, { @pseudo, Status => $2, Reason => $3 }; |
1163 | } |
1202 | } |
1164 | }); |
1203 | }); |
1165 | } else { |
1204 | } else { |
|
|
1205 | delete $hdr{"proxy-authorization"} unless $proxy; |
|
|
1206 | |
1166 | $handle_actual_request->(); |
1207 | $handle_actual_request->(); |
1167 | } |
1208 | } |
1168 | }; |
1209 | }; |
1169 | |
1210 | |
1170 | _get_slot $uhost, sub { |
1211 | _get_slot $uhost, sub { |
… | |
… | |
1248 | save cookies to disk, and you should call this function after loading them |
1289 | save cookies to disk, and you should call this function after loading them |
1249 | again. If you have a long-running program you can additionally call this |
1290 | again. If you have a long-running program you can additionally call this |
1250 | function from time to time. |
1291 | function from time to time. |
1251 | |
1292 | |
1252 | A cookie jar is initially an empty hash-reference that is managed by this |
1293 | A cookie jar is initially an empty hash-reference that is managed by this |
1253 | module. It's format is subject to change, but currently it is like this: |
1294 | module. Its format is subject to change, but currently it is as follows: |
1254 | |
1295 | |
1255 | The key C<version> has to contain C<1>, otherwise the hash gets |
1296 | The key C<version> has to contain C<1>, otherwise the hash gets |
1256 | emptied. All other keys are hostnames or IP addresses pointing to |
1297 | emptied. All other keys are hostnames or IP addresses pointing to |
1257 | hash-references. The key for these inner hash references is the |
1298 | hash-references. The key for these inner hash references is the |
1258 | server path for which this cookie is meant, and the values are again |
1299 | server path for which this cookie is meant, and the values are again |
… | |
… | |
1303 | C<Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). |
1344 | C<Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). |
1304 | |
1345 | |
1305 | =item $AnyEvent::HTTP::MAX_PER_HOST |
1346 | =item $AnyEvent::HTTP::MAX_PER_HOST |
1306 | |
1347 | |
1307 | The maximum number of concurrent connections to the same host (identified |
1348 | The maximum number of concurrent connections to the same host (identified |
1308 | by the hostname). If the limit is exceeded, then the additional requests |
1349 | by the hostname). If the limit is exceeded, then additional requests |
1309 | are queued until previous connections are closed. Both persistent and |
1350 | are queued until previous connections are closed. Both persistent and |
1310 | non-persistent connections are counted in this limit. |
1351 | non-persistent connections are counted in this limit. |
1311 | |
1352 | |
1312 | The default value for this is C<4>, and it is highly advisable to not |
1353 | The default value for this is C<4>, and it is highly advisable to not |
1313 | increase it much. |
1354 | increase it much. |