| 1 |
=head1 NAME |
| 2 |
|
| 3 |
AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client |
| 4 |
|
| 5 |
=head1 SYNOPSIS |
| 6 |
|
| 7 |
use AnyEvent::HTTP; |
| 8 |
|
| 9 |
http_get "http://www.nethype.de/", sub { |
| 10 |
my ($body, $hdr) = @_; |
| 11 |
print "$hdr->{URL} Status: $hdr->{Status}\n"; |
| 12 |
print $body; |
| 13 |
}; |
| 14 |
|
| 15 |
# ... do something else here |
| 16 |
|
| 17 |
=head1 DESCRIPTION |
| 18 |
|
| 19 |
This module is an L<AnyEvent> user, you need to make sure that you use and |
| 20 |
run a supported event loop. |
| 21 |
|
| 22 |
This module implements a simple, stateless and non-blocking HTTP |
| 23 |
client. It supports GET, POST and other request methods, cookies and more, |
| 24 |
all on a very low level. It can follow redirects, supports proxies, and |
| 25 |
automatically limits the number of connections to the values specified in |
| 26 |
the RFC. |
| 27 |
|
| 28 |
It should generally be a "good client" that is enough for most HTTP |
| 29 |
tasks. Simple tasks should be simple, but complex tasks should still be |
| 30 |
possible as the user retains control over request and response headers. |
| 31 |
|
| 32 |
The caller is responsible for authentication management, cookies (if |
| 33 |
the simplistic implementation in this module doesn't suffice), referer |
| 34 |
and other high-level protocol details for which this module offers only |
| 35 |
limited support. |
| 36 |
|
| 37 |
=head2 METHODS |
| 38 |
|
| 39 |
=over 4 |
| 40 |
|
| 41 |
=cut |
| 42 |
|
| 43 |
package AnyEvent::HTTP; |
| 44 |
|
| 45 |
use common::sense; |
| 46 |
|
| 47 |
use Errno (); |
| 48 |
|
| 49 |
use AnyEvent 5.0 (); |
| 50 |
use AnyEvent::Util (); |
| 51 |
use AnyEvent::Handle (); |
| 52 |
|
| 53 |
use base Exporter::; |
| 54 |
|
| 55 |
our $VERSION = 2.25; |
| 56 |
|
| 57 |
our @EXPORT = qw(http_get http_post http_head http_request); |
| 58 |
|
| 59 |
our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; |
| 60 |
our $MAX_RECURSE = 10; |
| 61 |
our $PERSISTENT_TIMEOUT = 3; |
| 62 |
our $TIMEOUT = 300; |
| 63 |
our $MAX_PER_HOST = 4; # changing this is evil |
| 64 |
|
| 65 |
our $PROXY; |
| 66 |
our $ACTIVE = 0; |
| 67 |
|
| 68 |
my %KA_CACHE; # indexed by uhost currently, points to [$handle...] array |
| 69 |
my %CO_SLOT; # number of open connections, and wait queue, per host |
| 70 |
|
| 71 |
=item http_get $url, key => value..., $cb->($data, $headers) |
| 72 |
|
| 73 |
Executes an HTTP-GET request. See the http_request function for details on |
| 74 |
additional parameters and the return value. |
| 75 |
|
| 76 |
=item http_head $url, key => value..., $cb->($data, $headers) |
| 77 |
|
| 78 |
Executes an HTTP-HEAD request. See the http_request function for details |
| 79 |
on additional parameters and the return value. |
| 80 |
|
| 81 |
=item http_post $url, $body, key => value..., $cb->($data, $headers) |
| 82 |
|
| 83 |
Executes an HTTP-POST request with a request body of C<$body>. See the |
| 84 |
http_request function for details on additional parameters and the return |
| 85 |
value. |
| 86 |
|
| 87 |
=item http_request $method => $url, key => value..., $cb->($data, $headers) |
| 88 |
|
| 89 |
Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL |
| 90 |
must be an absolute http or https URL. |
| 91 |
|
| 92 |
When called in void context, nothing is returned. In other contexts, |
| 93 |
C<http_request> returns a "cancellation guard" - you have to keep the |
| 94 |
object at least alive until the callback get called. If the object gets |
| 95 |
destroyed before the callback is called, the request will be cancelled. |
| 96 |
|
| 97 |
The callback will be called with the response body data as first argument |
| 98 |
(or C<undef> if an error occurred), and a hash-ref with response headers |
| 99 |
(and trailers) as second argument. |
| 100 |
|
| 101 |
All the headers in that hash are lowercased. In addition to the response |
| 102 |
headers, the "pseudo-headers" (uppercase to avoid clashing with possible |
| 103 |
response headers) C<HTTPVersion>, C<Status> and C<Reason> contain the |
| 104 |
three parts of the HTTP Status-Line of the same name. If an error occurs |
| 105 |
during the body phase of a request, then the original C<Status> and |
| 106 |
C<Reason> values from the header are available as C<OrigStatus> and |
| 107 |
C<OrigReason>. |
| 108 |
|
| 109 |
The pseudo-header C<URL> contains the actual URL (which can differ from |
| 110 |
the requested URL when following redirects - for example, you might get |
| 111 |
an error that your URL scheme is not supported even though your URL is a |
| 112 |
valid http URL because it redirected to an ftp URL, in which case you can |
| 113 |
look at the URL pseudo header). |
| 114 |
|
| 115 |
The pseudo-header C<Redirect> only exists when the request was a result |
| 116 |
of an internal redirect. In that case it is an array reference with |
| 117 |
the C<($data, $headers)> from the redirect response. Note that this |
| 118 |
response could in turn be the result of a redirect itself, and C<< |
| 119 |
$headers->{Redirect}[1]{Redirect} >> will then contain the original |
| 120 |
response, and so on. |
| 121 |
|
| 122 |
If the server sends a header multiple times, then their contents will be |
| 123 |
joined together with a comma (C<,>), as per the HTTP spec. |
| 124 |
|
| 125 |
If an internal error occurs, such as not being able to resolve a hostname, |
| 126 |
then C<$data> will be C<undef>, C<< $headers->{Status} >> will be |
| 127 |
C<590>-C<599> and the C<Reason> pseudo-header will contain an error |
| 128 |
message. Currently the following status codes are used: |
| 129 |
|
| 130 |
=over 4 |
| 131 |
|
| 132 |
=item 595 - errors during connection establishment, proxy handshake. |
| 133 |
|
| 134 |
=item 596 - errors during TLS negotiation, request sending and header processing. |
| 135 |
|
| 136 |
=item 597 - errors during body receiving or processing. |
| 137 |
|
| 138 |
=item 598 - user aborted request via C<on_header> or C<on_body>. |
| 139 |
|
| 140 |
=item 599 - other, usually nonretryable, errors (garbled URL etc.). |
| 141 |
|
| 142 |
=back |
| 143 |
|
| 144 |
A typical callback might look like this: |
| 145 |
|
| 146 |
sub { |
| 147 |
my ($body, $hdr) = @_; |
| 148 |
|
| 149 |
if ($hdr->{Status} =~ /^2/) { |
| 150 |
... everything should be ok |
| 151 |
} else { |
| 152 |
print "error, $hdr->{Status} $hdr->{Reason}\n"; |
| 153 |
} |
| 154 |
} |
| 155 |
|
| 156 |
Additional parameters are key-value pairs, and are fully optional. They |
| 157 |
include: |
| 158 |
|
| 159 |
=over 4 |
| 160 |
|
| 161 |
=item recurse => $count (default: $MAX_RECURSE) |
| 162 |
|
| 163 |
Whether to recurse requests or not, e.g. on redirects, authentication and |
| 164 |
other retries and so on, and how often to do so. |
| 165 |
|
| 166 |
Only redirects to http and https URLs are supported. While most common |
| 167 |
redirection forms are handled entirely within this module, some require |
| 168 |
the use of the optional L<URI> module. If it is required but missing, then |
| 169 |
the request will fail with an error. |
| 170 |
|
| 171 |
=item headers => hashref |
| 172 |
|
| 173 |
The request headers to use. Currently, C<http_request> may provide its own |
| 174 |
C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers and |
| 175 |
will provide defaults at least for C<TE:>, C<Referer:> and C<User-Agent:> |
| 176 |
(this can be suppressed by using C<undef> for these headers in which case |
| 177 |
they won't be sent at all). |
| 178 |
|
| 179 |
You really should provide your own C<User-Agent:> header value that is |
| 180 |
appropriate for your program - I wouldn't be surprised if the default |
| 181 |
AnyEvent string gets blocked by webservers sooner or later. |
| 182 |
|
| 183 |
Also, make sure that your headers names and values do not contain any |
| 184 |
embedded newlines. |
| 185 |
|
| 186 |
=item timeout => $seconds |
| 187 |
|
| 188 |
The time-out to use for various stages - each connect attempt will reset |
| 189 |
the timeout, as will read or write activity, i.e. this is not an overall |
| 190 |
timeout. |
| 191 |
|
| 192 |
Default timeout is 5 minutes. |
| 193 |
|
| 194 |
=item proxy => [$host, $port[, $scheme]] or undef |
| 195 |
|
| 196 |
Use the given http proxy for all requests, or no proxy if C<undef> is |
| 197 |
used. |
| 198 |
|
| 199 |
C<$scheme> must be either missing or must be C<http> for HTTP. |
| 200 |
|
| 201 |
If not specified, then the default proxy is used (see |
| 202 |
C<AnyEvent::HTTP::set_proxy>). |
| 203 |
|
| 204 |
Currently, if your proxy requires authorization, you have to specify an |
| 205 |
appropriate "Proxy-Authorization" header in every request. |
| 206 |
|
| 207 |
Note that this module will prefer an existing persistent connection, |
| 208 |
even if that connection was made using another proxy. If you need to |
| 209 |
ensure that a new connection is made in this case, you can either force |
| 210 |
C<persistent> to false or e.g. use the proxy address in your C<sessionid>. |
| 211 |
|
| 212 |
=item body => $string |
| 213 |
|
| 214 |
The request body, usually empty. Will be sent as-is (future versions of |
| 215 |
this module might offer more options). |
| 216 |
|
| 217 |
=item cookie_jar => $hash_ref |
| 218 |
|
| 219 |
Passing this parameter enables (simplified) cookie-processing, loosely |
| 220 |
based on the original netscape specification. |
| 221 |
|
| 222 |
The C<$hash_ref> must be an (initially empty) hash reference which |
| 223 |
will get updated automatically. It is possible to save the cookie jar |
| 224 |
to persistent storage with something like JSON or Storable - see the |
| 225 |
C<AnyEvent::HTTP::cookie_jar_expire> function if you wish to remove |
| 226 |
expired or session-only cookies, and also for documentation on the format |
| 227 |
of the cookie jar. |
| 228 |
|
| 229 |
Note that this cookie implementation is not meant to be complete. If |
| 230 |
you want complete cookie management you have to do that on your |
| 231 |
own. C<cookie_jar> is meant as a quick fix to get most cookie-using sites |
| 232 |
working. Cookies are a privacy disaster, do not use them unless required |
| 233 |
to. |
| 234 |
|
| 235 |
When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:> |
| 236 |
headers will be set and handled by this module, otherwise they will be |
| 237 |
left untouched. |
| 238 |
|
| 239 |
=item tls_ctx => $scheme | $tls_ctx |
| 240 |
|
| 241 |
Specifies the AnyEvent::TLS context to be used for https connections. This |
| 242 |
parameter follows the same rules as the C<tls_ctx> parameter to |
| 243 |
L<AnyEvent::Handle>, but additionally, the two strings C<low> or |
| 244 |
C<high> can be specified, which give you a predefined low-security (no |
| 245 |
verification, highest compatibility) and high-security (CA and common-name |
| 246 |
verification) TLS context. |
| 247 |
|
| 248 |
The default for this option is C<low>, which could be interpreted as "give |
| 249 |
me the page, no matter what". |
| 250 |
|
| 251 |
See also the C<sessionid> parameter. |
| 252 |
|
| 253 |
=item sessionid => $string |
| 254 |
|
| 255 |
The module might reuse connections to the same host internally (regardless |
| 256 |
of other settings, such as C<tcp_connect> or C<proxy>). Sometimes (e.g. |
| 257 |
when using TLS or a specfic proxy), you do not want to reuse connections |
| 258 |
from other sessions. This can be achieved by setting this parameter to |
| 259 |
some unique ID (such as the address of an object storing your state data |
| 260 |
or the TLS context, or the proxy IP) - only connections using the same |
| 261 |
unique ID will be reused. |
| 262 |
|
| 263 |
=item on_prepare => $callback->($fh) |
| 264 |
|
| 265 |
In rare cases you need to "tune" the socket before it is used to |
| 266 |
connect (for example, to bind it on a given IP address). This parameter |
| 267 |
overrides the prepare callback passed to C<AnyEvent::Socket::tcp_connect> |
| 268 |
and behaves exactly the same way (e.g. it has to provide a |
| 269 |
timeout). See the description for the C<$prepare_cb> argument of |
| 270 |
C<AnyEvent::Socket::tcp_connect> for details. |
| 271 |
|
| 272 |
=item tcp_connect => $callback->($host, $service, $connect_cb, $prepare_cb) |
| 273 |
|
| 274 |
In even rarer cases you want total control over how AnyEvent::HTTP |
| 275 |
establishes connections. Normally it uses L<AnyEvent::Socket::tcp_connect> |
| 276 |
to do this, but you can provide your own C<tcp_connect> function - |
| 277 |
obviously, it has to follow the same calling conventions, except that it |
| 278 |
may always return a connection guard object. |
| 279 |
|
| 280 |
The connections made by this hook will be treated as equivalent to |
| 281 |
connections made the built-in way, specifically, they will be put into |
| 282 |
and taken from the persistent connection cache. If your C<$tcp_connect> |
| 283 |
function is incompatible with this kind of re-use, consider switching off |
| 284 |
C<persistent> connections and/or providing a C<sessionid> identifier. |
| 285 |
|
| 286 |
There are probably lots of weird uses for this function, starting from |
| 287 |
tracing the hosts C<http_request> actually tries to connect, to (inexact |
| 288 |
but fast) host => IP address caching or even socks protocol support. |
| 289 |
|
| 290 |
=item on_header => $callback->($headers) |
| 291 |
|
| 292 |
When specified, this callback will be called with the header hash as soon |
| 293 |
as headers have been successfully received from the remote server (not on |
| 294 |
locally-generated errors). |
| 295 |
|
| 296 |
It has to return either true (in which case AnyEvent::HTTP will continue), |
| 297 |
or false, in which case AnyEvent::HTTP will cancel the download (and call |
| 298 |
the finish callback with an error code of C<598>). |
| 299 |
|
| 300 |
This callback is useful, among other things, to quickly reject unwanted |
| 301 |
content, which, if it is supposed to be rare, can be faster than first |
| 302 |
doing a C<HEAD> request. |
| 303 |
|
| 304 |
The downside is that cancelling the request makes it impossible to re-use |
| 305 |
the connection. Also, the C<on_header> callback will not receive any |
| 306 |
trailer (headers sent after the response body). |
| 307 |
|
| 308 |
Example: cancel the request unless the content-type is "text/html". |
| 309 |
|
| 310 |
on_header => sub { |
| 311 |
$_[0]{"content-type"} =~ /^text\/html\s*(?:;|$)/ |
| 312 |
}, |
| 313 |
|
| 314 |
=item on_body => $callback->($partial_body, $headers) |
| 315 |
|
| 316 |
When specified, all body data will be passed to this callback instead of |
| 317 |
to the completion callback. The completion callback will get the empty |
| 318 |
string instead of the body data. |
| 319 |
|
| 320 |
It has to return either true (in which case AnyEvent::HTTP will continue), |
| 321 |
or false, in which case AnyEvent::HTTP will cancel the download (and call |
| 322 |
the completion callback with an error code of C<598>). |
| 323 |
|
| 324 |
The downside to cancelling the request is that it makes it impossible to |
| 325 |
re-use the connection. |
| 326 |
|
| 327 |
This callback is useful when the data is too large to be held in memory |
| 328 |
(so the callback writes it to a file) or when only some information should |
| 329 |
be extracted, or when the body should be processed incrementally. |
| 330 |
|
| 331 |
It is usually preferred over doing your own body handling via |
| 332 |
C<want_body_handle>, but in case of streaming APIs, where HTTP is |
| 333 |
only used to create a connection, C<want_body_handle> is the better |
| 334 |
alternative, as it allows you to install your own event handler, reducing |
| 335 |
resource usage. |
| 336 |
|
| 337 |
=item want_body_handle => $enable |
| 338 |
|
| 339 |
When enabled (default is disabled), the behaviour of AnyEvent::HTTP |
| 340 |
changes considerably: after parsing the headers, and instead of |
| 341 |
downloading the body (if any), the completion callback will be |
| 342 |
called. Instead of the C<$body> argument containing the body data, the |
| 343 |
callback will receive the L<AnyEvent::Handle> object associated with the |
| 344 |
connection. In error cases, C<undef> will be passed. When there is no body |
| 345 |
(e.g. status C<304>), the empty string will be passed. |
| 346 |
|
| 347 |
The handle object might or might not be in TLS mode, might be connected |
| 348 |
to a proxy, be a persistent connection, use chunked transfer encoding |
| 349 |
etc., and configured in unspecified ways. The user is responsible for this |
| 350 |
handle (it will not be used by this module anymore). |
| 351 |
|
| 352 |
This is useful with some push-type services, where, after the initial |
| 353 |
headers, an interactive protocol is used (typical example would be the |
| 354 |
push-style twitter API which starts a JSON/XML stream). |
| 355 |
|
| 356 |
If you think you need this, first have a look at C<on_body>, to see if |
| 357 |
that doesn't solve your problem in a better way. |
| 358 |
|
| 359 |
=item persistent => $boolean |
| 360 |
|
| 361 |
Try to create/reuse a persistent connection. When this flag is set |
| 362 |
(default: true for idempotent requests, false for all others), then |
| 363 |
C<http_request> tries to re-use an existing (previously-created) |
| 364 |
persistent connection to same host (i.e. identical URL scheme, hostname, |
| 365 |
port and sessionid) and, failing that, tries to create a new one. |
| 366 |
|
| 367 |
Requests failing in certain ways will be automatically retried once, which |
| 368 |
is dangerous for non-idempotent requests, which is why it defaults to off |
| 369 |
for them. The reason for this is because the bozos who designed HTTP/1.1 |
| 370 |
made it impossible to distinguish between a fatal error and a normal |
| 371 |
connection timeout, so you never know whether there was a problem with |
| 372 |
your request or not. |
| 373 |
|
| 374 |
When reusing an existent connection, many parameters (such as TLS context) |
| 375 |
will be ignored. See the C<sessionid> parameter for a workaround. |
| 376 |
|
| 377 |
=item keepalive => $boolean |
| 378 |
|
| 379 |
Only used when C<persistent> is also true. This parameter decides whether |
| 380 |
C<http_request> tries to handshake a HTTP/1.0-style keep-alive connection |
| 381 |
(as opposed to only a HTTP/1.1 persistent connection). |
| 382 |
|
| 383 |
The default is true, except when using a proxy, in which case it defaults |
| 384 |
to false, as HTTP/1.0 proxies cannot support this in a meaningful way. |
| 385 |
|
| 386 |
=item handle_params => { key => value ... } |
| 387 |
|
| 388 |
The key-value pairs in this hash will be passed to any L<AnyEvent::Handle> |
| 389 |
constructor that is called - not all requests will create a handle, and |
| 390 |
sometimes more than one is created, so this parameter is only good for |
| 391 |
setting hints. |
| 392 |
|
| 393 |
Example: set the maximum read size to 4096, to potentially conserve memory |
| 394 |
at the cost of speed. |
| 395 |
|
| 396 |
handle_params => { |
| 397 |
max_read_size => 4096, |
| 398 |
}, |
| 399 |
|
| 400 |
=back |
| 401 |
|
| 402 |
Example: do a simple HTTP GET request for http://www.nethype.de/ and print |
| 403 |
the response body. |
| 404 |
|
| 405 |
http_request GET => "http://www.nethype.de/", sub { |
| 406 |
my ($body, $hdr) = @_; |
| 407 |
print "$body\n"; |
| 408 |
}; |
| 409 |
|
| 410 |
Example: do a HTTP HEAD request on https://www.google.com/, use a |
| 411 |
timeout of 30 seconds. |
| 412 |
|
| 413 |
http_request |
| 414 |
HEAD => "https://www.google.com", |
| 415 |
headers => { "user-agent" => "MySearchClient 1.0" }, |
| 416 |
timeout => 30, |
| 417 |
sub { |
| 418 |
my ($body, $hdr) = @_; |
| 419 |
use Data::Dumper; |
| 420 |
print Dumper $hdr; |
| 421 |
} |
| 422 |
; |
| 423 |
|
| 424 |
Example: do another simple HTTP GET request, but immediately try to |
| 425 |
cancel it. |
| 426 |
|
| 427 |
my $request = http_request GET => "http://www.nethype.de/", sub { |
| 428 |
my ($body, $hdr) = @_; |
| 429 |
print "$body\n"; |
| 430 |
}; |
| 431 |
|
| 432 |
undef $request; |
| 433 |
|
| 434 |
=cut |
| 435 |
|
| 436 |
############################################################################# |
| 437 |
# wait queue/slots |
| 438 |
|
| 439 |
sub _slot_schedule; |
| 440 |
sub _slot_schedule($) { |
| 441 |
my $host = shift; |
| 442 |
|
| 443 |
while ($CO_SLOT{$host}[0] < $MAX_PER_HOST) { |
| 444 |
if (my $cb = shift @{ $CO_SLOT{$host}[1] }) { |
| 445 |
# somebody wants that slot |
| 446 |
++$CO_SLOT{$host}[0]; |
| 447 |
++$ACTIVE; |
| 448 |
|
| 449 |
$cb->(AnyEvent::Util::guard { |
| 450 |
--$ACTIVE; |
| 451 |
--$CO_SLOT{$host}[0]; |
| 452 |
_slot_schedule $host; |
| 453 |
}); |
| 454 |
} else { |
| 455 |
# nobody wants the slot, maybe we can forget about it |
| 456 |
delete $CO_SLOT{$host} unless $CO_SLOT{$host}[0]; |
| 457 |
last; |
| 458 |
} |
| 459 |
} |
| 460 |
} |
| 461 |
|
| 462 |
# wait for a free slot on host, call callback |
| 463 |
sub _get_slot($$) { |
| 464 |
push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; |
| 465 |
|
| 466 |
_slot_schedule $_[0]; |
| 467 |
} |
| 468 |
|
| 469 |
############################################################################# |
| 470 |
# cookie handling |
| 471 |
|
| 472 |
# expire cookies |
| 473 |
sub cookie_jar_expire($;$) { |
| 474 |
my ($jar, $session_end) = @_; |
| 475 |
|
| 476 |
%$jar = () if $jar->{version} != 2; |
| 477 |
|
| 478 |
my $anow = AE::now; |
| 479 |
|
| 480 |
while (my ($chost, $paths) = each %$jar) { |
| 481 |
next unless ref $paths; |
| 482 |
|
| 483 |
while (my ($cpath, $cookies) = each %$paths) { |
| 484 |
while (my ($cookie, $kv) = each %$cookies) { |
| 485 |
if (exists $kv->{_expires}) { |
| 486 |
delete $cookies->{$cookie} |
| 487 |
if $anow > $kv->{_expires}; |
| 488 |
} elsif ($session_end) { |
| 489 |
delete $cookies->{$cookie}; |
| 490 |
} |
| 491 |
} |
| 492 |
|
| 493 |
delete $paths->{$cpath} |
| 494 |
unless %$cookies; |
| 495 |
} |
| 496 |
|
| 497 |
delete $jar->{$chost} |
| 498 |
unless %$paths; |
| 499 |
} |
| 500 |
} |
| 501 |
|
| 502 |
# extract cookies from jar |
| 503 |
sub cookie_jar_extract($$$$) { |
| 504 |
my ($jar, $scheme, $host, $path) = @_; |
| 505 |
|
| 506 |
%$jar = () if $jar->{version} != 2; |
| 507 |
|
| 508 |
$host = AnyEvent::Util::idn_to_ascii $host |
| 509 |
if $host =~ /[^\x00-\x7f]/; |
| 510 |
|
| 511 |
my @cookies; |
| 512 |
|
| 513 |
while (my ($chost, $paths) = each %$jar) { |
| 514 |
next unless ref $paths; |
| 515 |
|
| 516 |
# exact match or suffix including . match |
| 517 |
$chost eq $host or ".$chost" eq substr $host, -1 - length $chost |
| 518 |
or next; |
| 519 |
|
| 520 |
while (my ($cpath, $cookies) = each %$paths) { |
| 521 |
next unless $cpath eq substr $path, 0, length $cpath; |
| 522 |
|
| 523 |
while (my ($cookie, $kv) = each %$cookies) { |
| 524 |
next if $scheme ne "https" && exists $kv->{secure}; |
| 525 |
|
| 526 |
if (exists $kv->{_expires} and AE::now > $kv->{_expires}) { |
| 527 |
delete $cookies->{$cookie}; |
| 528 |
next; |
| 529 |
} |
| 530 |
|
| 531 |
my $value = $kv->{value}; |
| 532 |
|
| 533 |
if ($value =~ /[=;,[:space:]]/) { |
| 534 |
$value =~ s/([\\"])/\\$1/g; |
| 535 |
$value = "\"$value\""; |
| 536 |
} |
| 537 |
|
| 538 |
push @cookies, "$cookie=$value"; |
| 539 |
} |
| 540 |
} |
| 541 |
} |
| 542 |
|
| 543 |
\@cookies |
| 544 |
} |
| 545 |
|
| 546 |
# parse set_cookie header into jar |
| 547 |
sub cookie_jar_set_cookie($$$$) { |
| 548 |
my ($jar, $set_cookie, $host, $date) = @_; |
| 549 |
|
| 550 |
%$jar = () if $jar->{version} != 2; |
| 551 |
|
| 552 |
my $anow = int AE::now; |
| 553 |
my $snow; # server-now |
| 554 |
|
| 555 |
for ($set_cookie) { |
| 556 |
# parse NAME=VALUE |
| 557 |
my @kv; |
| 558 |
|
| 559 |
# expires is not http-compliant in the original cookie-spec, |
| 560 |
# we support the official date format and some extensions |
| 561 |
while ( |
| 562 |
m{ |
| 563 |
\G\s* |
| 564 |
(?: |
| 565 |
expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+) |
| 566 |
| ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^;,[:space:]]*) ) )? |
| 567 |
) |
| 568 |
}gcxsi |
| 569 |
) { |
| 570 |
my $name = $2; |
| 571 |
my $value = $4; |
| 572 |
|
| 573 |
if (defined $1) { |
| 574 |
# expires |
| 575 |
$name = "expires"; |
| 576 |
$value = $1; |
| 577 |
} elsif (defined $3) { |
| 578 |
# quoted |
| 579 |
$value = $3; |
| 580 |
$value =~ s/\\(.)/$1/gs; |
| 581 |
} |
| 582 |
|
| 583 |
push @kv, @kv ? lc $name : $name, $value; |
| 584 |
|
| 585 |
last unless /\G\s*;/gc; |
| 586 |
} |
| 587 |
|
| 588 |
last unless @kv; |
| 589 |
|
| 590 |
my $name = shift @kv; |
| 591 |
my %kv = (value => shift @kv, @kv); |
| 592 |
|
| 593 |
if (exists $kv{"max-age"}) { |
| 594 |
$kv{_expires} = $anow + delete $kv{"max-age"}; |
| 595 |
} elsif (exists $kv{expires}) { |
| 596 |
$snow ||= parse_date ($date) || $anow; |
| 597 |
$kv{_expires} = $anow + (parse_date (delete $kv{expires}) - $snow); |
| 598 |
} else { |
| 599 |
delete $kv{_expires}; |
| 600 |
} |
| 601 |
|
| 602 |
my $cdom; |
| 603 |
my $cpath = (delete $kv{path}) || "/"; |
| 604 |
|
| 605 |
if (exists $kv{domain}) { |
| 606 |
$cdom = $kv{domain}; |
| 607 |
|
| 608 |
$cdom =~ s/^\.?/./; # make sure it starts with a "." |
| 609 |
|
| 610 |
next if $cdom =~ /\.$/; |
| 611 |
|
| 612 |
# this is not rfc-like and not netscape-like. go figure. |
| 613 |
my $ndots = $cdom =~ y/.//; |
| 614 |
next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2); |
| 615 |
|
| 616 |
$cdom = substr $cdom, 1; # remove initial . |
| 617 |
} else { |
| 618 |
$cdom = $host; |
| 619 |
} |
| 620 |
|
| 621 |
# store it |
| 622 |
$jar->{version} = 2; |
| 623 |
$jar->{lc $cdom}{$cpath}{$name} = \%kv; |
| 624 |
|
| 625 |
redo if /\G\s*,/gc; |
| 626 |
} |
| 627 |
} |
| 628 |
|
| 629 |
############################################################################# |
| 630 |
# keepalive/persistent connection cache |
| 631 |
|
| 632 |
# fetch a connection from the keepalive cache |
| 633 |
sub ka_fetch($) { |
| 634 |
my $ka_key = shift; |
| 635 |
|
| 636 |
my $hdl = pop @{ $KA_CACHE{$ka_key} }; # currently we reuse the MOST RECENTLY USED connection |
| 637 |
delete $KA_CACHE{$ka_key} |
| 638 |
unless @{ $KA_CACHE{$ka_key} }; |
| 639 |
|
| 640 |
$hdl |
| 641 |
} |
| 642 |
|
| 643 |
sub ka_store($$) { |
| 644 |
my ($ka_key, $hdl) = @_; |
| 645 |
|
| 646 |
my $kaa = $KA_CACHE{$ka_key} ||= []; |
| 647 |
|
| 648 |
my $destroy = sub { |
| 649 |
my @ka = grep $_ != $hdl, @{ $KA_CACHE{$ka_key} }; |
| 650 |
|
| 651 |
$hdl->destroy; |
| 652 |
|
| 653 |
@ka |
| 654 |
? $KA_CACHE{$ka_key} = \@ka |
| 655 |
: delete $KA_CACHE{$ka_key}; |
| 656 |
}; |
| 657 |
|
| 658 |
# on error etc., destroy |
| 659 |
$hdl->on_error ($destroy); |
| 660 |
$hdl->on_eof ($destroy); |
| 661 |
$hdl->on_read ($destroy); |
| 662 |
$hdl->timeout ($PERSISTENT_TIMEOUT); |
| 663 |
|
| 664 |
push @$kaa, $hdl; |
| 665 |
shift @$kaa while @$kaa > $MAX_PER_HOST; |
| 666 |
} |
| 667 |
|
| 668 |
############################################################################# |
| 669 |
# utilities |
| 670 |
|
| 671 |
# continue to parse $_ for headers and place them into the arg |
| 672 |
sub _parse_hdr() { |
| 673 |
my %hdr; |
| 674 |
|
| 675 |
# things seen, not parsed: |
| 676 |
# p3pP="NON CUR OTPi OUR NOR UNI" |
| 677 |
|
| 678 |
$hdr{lc $1} .= ",$2" |
| 679 |
while /\G |
| 680 |
([^:\000-\037]*): |
| 681 |
[\011\040]* |
| 682 |
((?: [^\012]+ | \012[\011\040] )*) |
| 683 |
\012 |
| 684 |
/gxc; |
| 685 |
|
| 686 |
/\G$/ |
| 687 |
or return; |
| 688 |
|
| 689 |
# remove the "," prefix we added to all headers above |
| 690 |
substr $_, 0, 1, "" |
| 691 |
for values %hdr; |
| 692 |
|
| 693 |
\%hdr |
| 694 |
} |
| 695 |
|
| 696 |
############################################################################# |
| 697 |
# http_get |
| 698 |
|
| 699 |
our $qr_nlnl = qr{(?<![^\012])\015?\012}; |
| 700 |
|
| 701 |
our $TLS_CTX_LOW = { cache => 1, sslv2 => 1 }; |
| 702 |
our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_peername => "https" }; |
| 703 |
|
| 704 |
# maybe it should just become a normal object :/ |
| 705 |
|
| 706 |
sub _destroy_state(\%) { |
| 707 |
my ($state) = @_; |
| 708 |
|
| 709 |
$state->{handle}->destroy if $state->{handle}; |
| 710 |
%$state = (); |
| 711 |
} |
| 712 |
|
| 713 |
sub _error(\%$$) { |
| 714 |
my ($state, $cb, $hdr) = @_; |
| 715 |
|
| 716 |
&_destroy_state ($state); |
| 717 |
|
| 718 |
$cb->(undef, $hdr); |
| 719 |
() |
| 720 |
} |
| 721 |
|
| 722 |
our %IDEMPOTENT = ( |
| 723 |
DELETE => 1, |
| 724 |
GET => 1, |
| 725 |
QUERY => 1, |
| 726 |
HEAD => 1, |
| 727 |
OPTIONS => 1, |
| 728 |
PUT => 1, |
| 729 |
TRACE => 1, |
| 730 |
|
| 731 |
ACL => 1, |
| 732 |
"BASELINE-CONTROL" => 1, |
| 733 |
BIND => 1, |
| 734 |
CHECKIN => 1, |
| 735 |
CHECKOUT => 1, |
| 736 |
COPY => 1, |
| 737 |
LABEL => 1, |
| 738 |
LINK => 1, |
| 739 |
MERGE => 1, |
| 740 |
MKACTIVITY => 1, |
| 741 |
MKCALENDAR => 1, |
| 742 |
MKCOL => 1, |
| 743 |
MKREDIRECTREF => 1, |
| 744 |
MKWORKSPACE => 1, |
| 745 |
MOVE => 1, |
| 746 |
ORDERPATCH => 1, |
| 747 |
PRI => 1, |
| 748 |
PROPFIND => 1, |
| 749 |
PROPPATCH => 1, |
| 750 |
REBIND => 1, |
| 751 |
REPORT => 1, |
| 752 |
SEARCH => 1, |
| 753 |
UNBIND => 1, |
| 754 |
UNCHECKOUT => 1, |
| 755 |
UNLINK => 1, |
| 756 |
UNLOCK => 1, |
| 757 |
UPDATE => 1, |
| 758 |
UPDATEREDIRECTREF => 1, |
| 759 |
"VERSION-CONTROL" => 1, |
| 760 |
); |
| 761 |
|
| 762 |
sub http_request($$@) { |
| 763 |
my $cb = pop; |
| 764 |
my ($method, $url, %arg) = @_; |
| 765 |
|
| 766 |
my %hdr; |
| 767 |
|
| 768 |
$arg{tls_ctx} = $TLS_CTX_LOW if $arg{tls_ctx} eq "low" || !exists $arg{tls_ctx}; |
| 769 |
$arg{tls_ctx} = $TLS_CTX_HIGH if $arg{tls_ctx} eq "high"; |
| 770 |
|
| 771 |
$method = uc $method; |
| 772 |
|
| 773 |
if (my $hdr = $arg{headers}) { |
| 774 |
while (my ($k, $v) = each %$hdr) { |
| 775 |
$hdr{lc $k} = $v; |
| 776 |
} |
| 777 |
} |
| 778 |
|
| 779 |
# pseudo headers for all subsequent responses |
| 780 |
my @pseudo = (URL => $url); |
| 781 |
push @pseudo, Redirect => delete $arg{Redirect} if exists $arg{Redirect}; |
| 782 |
|
| 783 |
my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE; |
| 784 |
|
| 785 |
return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" }) |
| 786 |
if $recurse < 0; |
| 787 |
|
| 788 |
my $proxy = exists $arg{proxy} ? $arg{proxy} : $PROXY; |
| 789 |
my $timeout = $arg{timeout} || $TIMEOUT; |
| 790 |
|
| 791 |
my ($uscheme, $uauthority, $upath, $query, undef) = # ignore fragment |
| 792 |
$url =~ m|^([^:]+):(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?$|; |
| 793 |
|
| 794 |
$uscheme = lc $uscheme; |
| 795 |
|
| 796 |
my $uport = $uscheme eq "http" ? 80 |
| 797 |
: $uscheme eq "https" ? 443 |
| 798 |
: return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); |
| 799 |
|
| 800 |
$uauthority =~ /^(?: .*\@ )? ([^\@]+?) (?: : (\d+) )?$/x |
| 801 |
or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); |
| 802 |
|
| 803 |
my $uhost = lc $1; |
| 804 |
$uport = $2 if defined $2; |
| 805 |
|
| 806 |
$hdr{host} = defined $2 ? "$uhost:$2" : "$uhost" |
| 807 |
unless exists $hdr{host}; |
| 808 |
|
| 809 |
$uhost =~ s/^\[(.*)\]$/$1/; |
| 810 |
$upath .= $query if length $query; |
| 811 |
|
| 812 |
$upath =~ s%^/?%/%; |
| 813 |
|
| 814 |
# cookie processing |
| 815 |
if (my $jar = $arg{cookie_jar}) { |
| 816 |
my $cookies = cookie_jar_extract $jar, $uscheme, $uhost, $upath; |
| 817 |
|
| 818 |
$hdr{cookie} = join "; ", @$cookies |
| 819 |
if @$cookies; |
| 820 |
} |
| 821 |
|
| 822 |
my ($rhost, $rport, $rscheme, $rpath); # request host, port, path |
| 823 |
|
| 824 |
if ($proxy) { |
| 825 |
($rpath, $rhost, $rport, $rscheme) = ($url, @$proxy); |
| 826 |
|
| 827 |
$rscheme = "http" unless defined $rscheme; |
| 828 |
|
| 829 |
# don't support https requests over https-proxy transport, |
| 830 |
# can't be done with tls as spec'ed, unless you double-encrypt. |
| 831 |
$rscheme = "http" if $uscheme eq "https" && $rscheme eq "https"; |
| 832 |
|
| 833 |
$rhost = lc $rhost; |
| 834 |
$rscheme = lc $rscheme; |
| 835 |
} else { |
| 836 |
($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath); |
| 837 |
} |
| 838 |
|
| 839 |
# leave out fragment and query string, just a heuristic |
| 840 |
$hdr{referer} = "$uscheme://$uauthority$upath" unless exists $hdr{referer}; |
| 841 |
$hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"}; |
| 842 |
|
| 843 |
$hdr{"content-length"} = length $arg{body} |
| 844 |
if length $arg{body} || $method ne "GET"; |
| 845 |
|
| 846 |
my $idempotent = $IDEMPOTENT{$method}; |
| 847 |
|
| 848 |
# default value for keepalive is true iff the request is for an idempotent method |
| 849 |
my $persistent = exists $arg{persistent} ? !!$arg{persistent} : $idempotent; |
| 850 |
my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : !$proxy; |
| 851 |
my $was_persistent; # true if this is actually a recycled connection |
| 852 |
|
| 853 |
# the key to use in the keepalive cache |
| 854 |
my $ka_key = "$uscheme\x00$uhost\x00$uport\x00$arg{sessionid}"; |
| 855 |
|
| 856 |
$hdr{connection} = ($persistent ? $keepalive ? "keep-alive, " : "" : "close, ") . "Te"; #1.1 |
| 857 |
$hdr{te} = "trailers" unless exists $hdr{te}; #1.1 |
| 858 |
|
| 859 |
my %state = (connect_guard => 1); |
| 860 |
|
| 861 |
my $ae_error = 595; # connecting |
| 862 |
|
| 863 |
# handle actual, non-tunneled, request |
| 864 |
my $handle_actual_request = sub { |
| 865 |
$ae_error = 596; # request phase |
| 866 |
|
| 867 |
my $hdl = $state{handle}; |
| 868 |
|
| 869 |
$hdl->starttls ("connect") if $uscheme eq "https" && !exists $hdl->{tls}; |
| 870 |
|
| 871 |
# send request |
| 872 |
$hdl->push_write ( |
| 873 |
"$method $rpath HTTP/1.1\015\012" |
| 874 |
. (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr) |
| 875 |
. "\015\012" |
| 876 |
. $arg{body} |
| 877 |
); |
| 878 |
|
| 879 |
# return if error occurred during push_write() |
| 880 |
return unless %state; |
| 881 |
|
| 882 |
# reduce memory usage, save a kitten, also re-use it for the response headers. |
| 883 |
%hdr = (); |
| 884 |
|
| 885 |
# status line and headers |
| 886 |
$state{read_response} = sub { |
| 887 |
return unless %state; |
| 888 |
|
| 889 |
for ("$_[1]") { |
| 890 |
y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
| 891 |
|
| 892 |
/^HTTP\/0*([0-9\.]+) \x20 ([0-9]{3}) (?: \x20 ([^\012]*) )? \012/gxci |
| 893 |
or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid server response" }; |
| 894 |
|
| 895 |
# 100 Continue handling |
| 896 |
# should not happen as we don't send expect: 100-continue, |
| 897 |
# but we handle it just in case. |
| 898 |
# since we send the request body regardless, if we get an error |
| 899 |
# we are out of-sync, which we currently do NOT handle correctly. |
| 900 |
return $state{handle}->push_read (line => $qr_nlnl, $state{read_response}) |
| 901 |
if $2 eq 100; |
| 902 |
|
| 903 |
push @pseudo, |
| 904 |
HTTPVersion => $1, |
| 905 |
Status => $2, |
| 906 |
Reason => $3, |
| 907 |
; |
| 908 |
|
| 909 |
my $hdr = _parse_hdr |
| 910 |
or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Garbled response headers" }; |
| 911 |
|
| 912 |
%hdr = (%$hdr, @pseudo); |
| 913 |
} |
| 914 |
|
| 915 |
# redirect handling |
| 916 |
# relative uri handling forced by microsoft and other shitheads. |
| 917 |
# we give our best and fall back to URI if available. |
| 918 |
if (exists $hdr{location}) { |
| 919 |
my $loc = $hdr{location}; |
| 920 |
|
| 921 |
if ($loc =~ m%^//%) { # // |
| 922 |
$loc = "$uscheme:$loc"; |
| 923 |
|
| 924 |
} elsif ($loc eq "") { |
| 925 |
$loc = $url; |
| 926 |
|
| 927 |
} elsif ($loc !~ /^(?: $ | [^:\/?\#]+ : )/x) { # anything "simple" |
| 928 |
$loc =~ s/^\.\/+//; |
| 929 |
|
| 930 |
if ($loc !~ m%^[.?#]%) { |
| 931 |
my $prefix = "$uscheme://$uauthority"; |
| 932 |
|
| 933 |
unless ($loc =~ s/^\///) { |
| 934 |
$prefix .= $upath; |
| 935 |
$prefix =~ s/\/[^\/]*$//; |
| 936 |
} |
| 937 |
|
| 938 |
$loc = "$prefix/$loc"; |
| 939 |
|
| 940 |
} elsif (eval { require URI }) { # uri |
| 941 |
$loc = URI->new_abs ($loc, $url)->as_string; |
| 942 |
|
| 943 |
} else { |
| 944 |
return _error %state, $cb, { @pseudo, Status => 599, Reason => "Cannot parse Location (URI module missing)" }; |
| 945 |
#$hdr{Status} = 599; |
| 946 |
#$hdr{Reason} = "Unparsable Redirect (URI module missing)"; |
| 947 |
#$recurse = 0; |
| 948 |
} |
| 949 |
} |
| 950 |
|
| 951 |
$hdr{location} = $loc; |
| 952 |
} |
| 953 |
|
| 954 |
my $redirect; |
| 955 |
|
| 956 |
if ($recurse) { |
| 957 |
my $status = $hdr{Status}; |
| 958 |
|
| 959 |
# industry standard is to redirect POST as GET for |
| 960 |
# 301, 302 and 303, in contrast to HTTP/1.0 and 1.1. |
| 961 |
# also, the UA should ask the user for 301 and 307 and POST, |
| 962 |
# industry standard seems to be to simply follow. |
| 963 |
# we go with the industry standard. 308 is defined |
| 964 |
# by rfc7538 |
| 965 |
if ($status == 301 or $status == 302 or $status == 303) { |
| 966 |
$redirect = 1; |
| 967 |
# HTTP/1.1 is unclear on how to mutate the method |
| 968 |
unless ($method eq "HEAD") { |
| 969 |
$method = "GET"; |
| 970 |
delete $arg{body}; |
| 971 |
} |
| 972 |
} elsif ($status == 307 or $status == 308) { |
| 973 |
$redirect = 1; |
| 974 |
} |
| 975 |
} |
| 976 |
|
| 977 |
my $finish = sub { # ($data, $err_status, $err_reason[, $persistent]) |
| 978 |
if ($state{handle}) { |
| 979 |
# handle keepalive |
| 980 |
if ( |
| 981 |
$persistent |
| 982 |
&& $_[3] |
| 983 |
&& ($hdr{HTTPVersion} < 1.1 |
| 984 |
? $hdr{connection} =~ /\bkeep-?alive\b/i |
| 985 |
: $hdr{connection} !~ /\bclose\b/i) |
| 986 |
) { |
| 987 |
ka_store $ka_key, delete $state{handle}; |
| 988 |
} else { |
| 989 |
# no keepalive, destroy the handle |
| 990 |
$state{handle}->destroy; |
| 991 |
} |
| 992 |
} |
| 993 |
|
| 994 |
%state = (); |
| 995 |
|
| 996 |
if (defined $_[1]) { |
| 997 |
$hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1]; |
| 998 |
$hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2]; |
| 999 |
} |
| 1000 |
|
| 1001 |
# set-cookie processing |
| 1002 |
if ($arg{cookie_jar}) { |
| 1003 |
cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date}; |
| 1004 |
} |
| 1005 |
|
| 1006 |
if ($redirect && exists $hdr{location}) { |
| 1007 |
# we ignore any errors, as it is very common to receive |
| 1008 |
# Content-Length != 0 but no actual body |
| 1009 |
# we also access %hdr, as $_[1] might be an erro |
| 1010 |
$state{recurse} = |
| 1011 |
http_request ( |
| 1012 |
$method => $hdr{location}, |
| 1013 |
%arg, |
| 1014 |
recurse => $recurse - 1, |
| 1015 |
Redirect => [$_[0], \%hdr], |
| 1016 |
sub { |
| 1017 |
%state = (); |
| 1018 |
&$cb |
| 1019 |
}, |
| 1020 |
); |
| 1021 |
} else { |
| 1022 |
$cb->($_[0], \%hdr); |
| 1023 |
} |
| 1024 |
}; |
| 1025 |
|
| 1026 |
$ae_error = 597; # body phase |
| 1027 |
|
| 1028 |
my $chunked = $hdr{"transfer-encoding"} =~ /\bchunked\b/i; # not quite correct... |
| 1029 |
|
| 1030 |
my $len = $chunked ? undef : $hdr{"content-length"}; |
| 1031 |
|
| 1032 |
# body handling, many different code paths |
| 1033 |
# - no body expected |
| 1034 |
# - want_body_handle |
| 1035 |
# - te chunked |
| 1036 |
# - 2x length known (with or without on_body) |
| 1037 |
# - 2x length not known (with or without on_body) |
| 1038 |
if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { |
| 1039 |
$finish->(undef, 598 => "Request cancelled by on_header"); |
| 1040 |
} elsif ( |
| 1041 |
$hdr{Status} =~ /^(?:1..|204|205|304)$/ |
| 1042 |
or $method eq "HEAD" |
| 1043 |
or (defined $len && $len == 0) # == 0, not !, because "0 " is true |
| 1044 |
) { |
| 1045 |
# no body |
| 1046 |
$finish->("", undef, undef, 1); |
| 1047 |
|
| 1048 |
} elsif (!$redirect && $arg{want_body_handle}) { |
| 1049 |
$_[0]->on_eof (undef); |
| 1050 |
$_[0]->on_error (undef); |
| 1051 |
$_[0]->on_read (undef); |
| 1052 |
|
| 1053 |
$finish->(delete $state{handle}); |
| 1054 |
|
| 1055 |
} elsif ($chunked) { |
| 1056 |
my $cl = 0; |
| 1057 |
my $body = ""; |
| 1058 |
my $on_body = (!$redirect && $arg{on_body}) || sub { $body .= shift; 1 }; |
| 1059 |
|
| 1060 |
$state{read_chunk} = sub { |
| 1061 |
$_[1] =~ /^([0-9a-fA-F]+)/ |
| 1062 |
or return $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); |
| 1063 |
|
| 1064 |
my $len = hex $1; |
| 1065 |
|
| 1066 |
if ($len) { |
| 1067 |
$cl += $len; |
| 1068 |
|
| 1069 |
$_[0]->push_read (chunk => $len, sub { |
| 1070 |
$on_body->($_[1], \%hdr) |
| 1071 |
or return $finish->(undef, 598 => "Request cancelled by on_body"); |
| 1072 |
|
| 1073 |
$_[0]->push_read (line => sub { |
| 1074 |
length $_[1] |
| 1075 |
and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); |
| 1076 |
$_[0]->push_read (line => $state{read_chunk}); |
| 1077 |
}); |
| 1078 |
}); |
| 1079 |
} else { |
| 1080 |
$hdr{"content-length"} ||= $cl; |
| 1081 |
|
| 1082 |
$_[0]->push_read (line => $qr_nlnl, sub { |
| 1083 |
if (length $_[1]) { |
| 1084 |
for ("$_[1]") { |
| 1085 |
y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
| 1086 |
|
| 1087 |
my $hdr = _parse_hdr |
| 1088 |
or return $finish->(undef, $ae_error => "Garbled response trailers"); |
| 1089 |
|
| 1090 |
%hdr = (%hdr, %$hdr); |
| 1091 |
} |
| 1092 |
} |
| 1093 |
|
| 1094 |
$finish->($body, undef, undef, 1); |
| 1095 |
}); |
| 1096 |
} |
| 1097 |
}; |
| 1098 |
|
| 1099 |
$_[0]->push_read (line => $state{read_chunk}); |
| 1100 |
|
| 1101 |
} elsif (!$redirect && $arg{on_body}) { |
| 1102 |
if (defined $len) { |
| 1103 |
$_[0]->on_read (sub { |
| 1104 |
$len -= length $_[0]{rbuf}; |
| 1105 |
|
| 1106 |
$arg{on_body}(delete $_[0]{rbuf}, \%hdr) |
| 1107 |
or return $finish->(undef, 598 => "Request cancelled by on_body"); |
| 1108 |
|
| 1109 |
$len > 0 |
| 1110 |
or $finish->("", undef, undef, 1); |
| 1111 |
}); |
| 1112 |
} else { |
| 1113 |
$_[0]->on_eof (sub { |
| 1114 |
$finish->(""); |
| 1115 |
}); |
| 1116 |
$_[0]->on_read (sub { |
| 1117 |
$arg{on_body}(delete $_[0]{rbuf}, \%hdr) |
| 1118 |
or $finish->(undef, 598 => "Request cancelled by on_body"); |
| 1119 |
}); |
| 1120 |
} |
| 1121 |
} else { |
| 1122 |
$_[0]->on_eof (undef); |
| 1123 |
|
| 1124 |
if (defined $len) { |
| 1125 |
$_[0]->on_read (sub { |
| 1126 |
$finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) |
| 1127 |
if $len <= length $_[0]{rbuf}; |
| 1128 |
}); |
| 1129 |
} else { |
| 1130 |
$_[0]->on_error (sub { |
| 1131 |
($! == Errno::EPIPE || !$!) |
| 1132 |
? $finish->(delete $_[0]{rbuf}) |
| 1133 |
: $finish->(undef, $ae_error => $_[2]); |
| 1134 |
}); |
| 1135 |
$_[0]->on_read (sub { }); |
| 1136 |
} |
| 1137 |
} |
| 1138 |
}; |
| 1139 |
|
| 1140 |
# if keepalive is enabled, then the server closing the connection |
| 1141 |
# before a response can happen legally - we retry on idempotent methods. |
| 1142 |
if ($was_persistent && $idempotent) { |
| 1143 |
my $old_eof = $hdl->{on_eof}; |
| 1144 |
$hdl->{on_eof} = sub { |
| 1145 |
_destroy_state %state; |
| 1146 |
|
| 1147 |
%state = (); |
| 1148 |
$state{recurse} = |
| 1149 |
http_request ( |
| 1150 |
$method => $url, |
| 1151 |
%arg, |
| 1152 |
recurse => $recurse - 1, |
| 1153 |
persistent => 0, |
| 1154 |
sub { |
| 1155 |
%state = (); |
| 1156 |
&$cb |
| 1157 |
} |
| 1158 |
); |
| 1159 |
}; |
| 1160 |
$hdl->on_read (sub { |
| 1161 |
return unless %state; |
| 1162 |
|
| 1163 |
# as soon as we receive something, a connection close |
| 1164 |
# once more becomes a hard error |
| 1165 |
$hdl->{on_eof} = $old_eof; |
| 1166 |
$hdl->push_read (line => $qr_nlnl, $state{read_response}); |
| 1167 |
}); |
| 1168 |
} else { |
| 1169 |
$hdl->push_read (line => $qr_nlnl, $state{read_response}); |
| 1170 |
} |
| 1171 |
}; |
| 1172 |
|
| 1173 |
my $prepare_handle = sub { |
| 1174 |
my ($hdl) = $state{handle}; |
| 1175 |
|
| 1176 |
$hdl->on_error (sub { |
| 1177 |
_error %state, $cb, { @pseudo, Status => $ae_error, Reason => $_[2] }; |
| 1178 |
}); |
| 1179 |
$hdl->on_eof (sub { |
| 1180 |
_error %state, $cb, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" }; |
| 1181 |
}); |
| 1182 |
$hdl->timeout_reset; |
| 1183 |
$hdl->timeout ($timeout); |
| 1184 |
}; |
| 1185 |
|
| 1186 |
# connected to proxy (or origin server) |
| 1187 |
my $connect_cb = sub { |
| 1188 |
my $fh = shift |
| 1189 |
or return _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "$!" }; |
| 1190 |
|
| 1191 |
return unless delete $state{connect_guard}; |
| 1192 |
|
| 1193 |
# get handle |
| 1194 |
$state{handle} = new AnyEvent::Handle |
| 1195 |
%{ $arg{handle_params} }, |
| 1196 |
fh => $fh, |
| 1197 |
peername => $uhost, |
| 1198 |
tls_ctx => $arg{tls_ctx}, |
| 1199 |
; |
| 1200 |
|
| 1201 |
$prepare_handle->(); |
| 1202 |
|
| 1203 |
#$state{handle}->starttls ("connect") if $rscheme eq "https"; |
| 1204 |
|
| 1205 |
# now handle proxy-CONNECT method |
| 1206 |
if ($proxy && $uscheme eq "https") { |
| 1207 |
# oh dear, we have to wrap it into a connect request |
| 1208 |
|
| 1209 |
my $auth = exists $hdr{"proxy-authorization"} |
| 1210 |
? "proxy-authorization: " . (delete $hdr{"proxy-authorization"}) . "\015\012" |
| 1211 |
: ""; |
| 1212 |
|
| 1213 |
# maybe re-use $uauthority with patched port? |
| 1214 |
$state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012$auth\015\012"); |
| 1215 |
$state{handle}->push_read (line => $qr_nlnl, sub { |
| 1216 |
$_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix |
| 1217 |
or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }; |
| 1218 |
|
| 1219 |
if ($2 == 200) { |
| 1220 |
$rpath = $upath; |
| 1221 |
$handle_actual_request->(); |
| 1222 |
} else { |
| 1223 |
_error %state, $cb, { @pseudo, Status => $2, Reason => $3 }; |
| 1224 |
} |
| 1225 |
}); |
| 1226 |
} else { |
| 1227 |
delete $hdr{"proxy-authorization"} unless $proxy; |
| 1228 |
|
| 1229 |
$handle_actual_request->(); |
| 1230 |
} |
| 1231 |
}; |
| 1232 |
|
| 1233 |
_get_slot $uhost, sub { |
| 1234 |
$state{slot_guard} = shift; |
| 1235 |
|
| 1236 |
return unless $state{connect_guard}; |
| 1237 |
|
| 1238 |
# try to use an existing keepalive connection, but only if we, ourselves, plan |
| 1239 |
# on a keepalive request (in theory, this should be a separate config option). |
| 1240 |
if ($persistent && $KA_CACHE{$ka_key}) { |
| 1241 |
$was_persistent = 1; |
| 1242 |
|
| 1243 |
$state{handle} = ka_fetch $ka_key; |
| 1244 |
# $state{handle}->destroyed |
| 1245 |
# and die "AnyEvent::HTTP: unexpectedly got a destructed handle (1), please report.";#d# |
| 1246 |
$prepare_handle->(); |
| 1247 |
# $state{handle}->destroyed |
| 1248 |
# and die "AnyEvent::HTTP: unexpectedly got a destructed handle (2), please report.";#d# |
| 1249 |
$rpath = $upath; |
| 1250 |
$handle_actual_request->(); |
| 1251 |
|
| 1252 |
} else { |
| 1253 |
my $tcp_connect = $arg{tcp_connect} |
| 1254 |
|| do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; |
| 1255 |
|
| 1256 |
$state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout }); |
| 1257 |
} |
| 1258 |
}; |
| 1259 |
|
| 1260 |
defined wantarray && AnyEvent::Util::guard { _destroy_state %state } |
| 1261 |
} |
| 1262 |
|
| 1263 |
sub http_get($@) { |
| 1264 |
unshift @_, "GET"; |
| 1265 |
&http_request |
| 1266 |
} |
| 1267 |
|
| 1268 |
sub http_head($@) { |
| 1269 |
unshift @_, "HEAD"; |
| 1270 |
&http_request |
| 1271 |
} |
| 1272 |
|
| 1273 |
sub http_post($$@) { |
| 1274 |
my $url = shift; |
| 1275 |
unshift @_, "POST", $url, "body"; |
| 1276 |
&http_request |
| 1277 |
} |
| 1278 |
|
| 1279 |
=back |
| 1280 |
|
| 1281 |
=head2 DNS CACHING |
| 1282 |
|
| 1283 |
AnyEvent::HTTP uses the AnyEvent::Socket::tcp_connect function for |
| 1284 |
the actual connection, which in turn uses AnyEvent::DNS to resolve |
| 1285 |
hostnames. The latter is a simple stub resolver and does no caching |
| 1286 |
on its own. If you want DNS caching, you currently have to provide |
| 1287 |
your own default resolver (by storing a suitable resolver object in |
| 1288 |
C<$AnyEvent::DNS::RESOLVER>) or your own C<tcp_connect> callback. |
| 1289 |
|
| 1290 |
=head2 GLOBAL FUNCTIONS AND VARIABLES |
| 1291 |
|
| 1292 |
=over 4 |
| 1293 |
|
| 1294 |
=item AnyEvent::HTTP::set_proxy "proxy-url" |
| 1295 |
|
| 1296 |
Sets the default proxy server to use. The proxy-url must begin with a |
| 1297 |
string of the form C<http://host:port>, croaks otherwise. |
| 1298 |
|
| 1299 |
To clear an already-set proxy, use C<undef>. |
| 1300 |
|
| 1301 |
When AnyEvent::HTTP is loaded for the first time it will query the |
| 1302 |
default proxy from the operating system, currently by looking at |
| 1303 |
C<$ENV{http_proxy>}. |
| 1304 |
|
| 1305 |
=item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end] |
| 1306 |
|
| 1307 |
Remove all cookies from the cookie jar that have been expired. If |
| 1308 |
C<$session_end> is given and true, then additionally remove all session |
| 1309 |
cookies. |
| 1310 |
|
| 1311 |
You should call this function (with a true C<$session_end>) before you |
| 1312 |
save cookies to disk, and you should call this function after loading them |
| 1313 |
again. If you have a long-running program you can additionally call this |
| 1314 |
function from time to time. |
| 1315 |
|
| 1316 |
A cookie jar is initially an empty hash-reference that is managed by this |
| 1317 |
module. Its format is subject to change, but currently it is as follows: |
| 1318 |
|
| 1319 |
The key C<version> has to contain C<2>, otherwise the hash gets |
| 1320 |
cleared. All other keys are hostnames or IP addresses pointing to |
| 1321 |
hash-references. The key for these inner hash references is the |
| 1322 |
server path for which this cookie is meant, and the values are again |
| 1323 |
hash-references. Each key of those hash-references is a cookie name, and |
| 1324 |
the value, you guessed it, is another hash-reference, this time with the |
| 1325 |
key-value pairs from the cookie, except for C<expires> and C<max-age>, |
| 1326 |
which have been replaced by a C<_expires> key that contains the cookie |
| 1327 |
expiry timestamp. Session cookies are indicated by not having an |
| 1328 |
C<_expires> key. |
| 1329 |
|
| 1330 |
Here is an example of a cookie jar with a single cookie, so you have a |
| 1331 |
chance of understanding the above paragraph: |
| 1332 |
|
| 1333 |
{ |
| 1334 |
version => 2, |
| 1335 |
"10.0.0.1" => { |
| 1336 |
"/" => { |
| 1337 |
"mythweb_id" => { |
| 1338 |
_expires => 1293917923, |
| 1339 |
value => "ooRung9dThee3ooyXooM1Ohm", |
| 1340 |
}, |
| 1341 |
}, |
| 1342 |
}, |
| 1343 |
} |
| 1344 |
|
| 1345 |
=item $date = AnyEvent::HTTP::format_date $timestamp |
| 1346 |
|
| 1347 |
Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP |
| 1348 |
Date (RFC 2616). |
| 1349 |
|
| 1350 |
=item $timestamp = AnyEvent::HTTP::parse_date $date |
| 1351 |
|
| 1352 |
Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a |
| 1353 |
bunch of minor variations of those, and returns the corresponding POSIX |
| 1354 |
timestamp, or C<undef> if the date cannot be parsed. |
| 1355 |
|
| 1356 |
=item $AnyEvent::HTTP::MAX_RECURSE |
| 1357 |
|
| 1358 |
The default value for the C<recurse> request parameter (default: C<10>). |
| 1359 |
|
| 1360 |
=item $AnyEvent::HTTP::TIMEOUT |
| 1361 |
|
| 1362 |
The default timeout for connection operations (default: C<300>). |
| 1363 |
|
| 1364 |
=item $AnyEvent::HTTP::USERAGENT |
| 1365 |
|
| 1366 |
The default value for the C<User-Agent> header (the default is |
| 1367 |
C<Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>). |
| 1368 |
|
| 1369 |
=item $AnyEvent::HTTP::MAX_PER_HOST |
| 1370 |
|
| 1371 |
The maximum number of concurrent connections to the same host (identified |
| 1372 |
by the hostname). If the limit is exceeded, then additional requests |
| 1373 |
are queued until previous connections are closed. Both persistent and |
| 1374 |
non-persistent connections are counted in this limit. |
| 1375 |
|
| 1376 |
The default value for this is C<4>, and it is highly advisable to not |
| 1377 |
increase it much. |
| 1378 |
|
| 1379 |
For comparison: the RFC's recommend 4 non-persistent or 2 persistent |
| 1380 |
connections, older browsers used 2, newer ones (such as firefox 3) |
| 1381 |
typically use 6, and Opera uses 8 because like, they have the fastest |
| 1382 |
browser and give a shit for everybody else on the planet. |
| 1383 |
|
| 1384 |
=item $AnyEvent::HTTP::PERSISTENT_TIMEOUT |
| 1385 |
|
| 1386 |
The time after which idle persistent connections get closed by |
| 1387 |
AnyEvent::HTTP (default: C<3>). |
| 1388 |
|
| 1389 |
=item $AnyEvent::HTTP::ACTIVE |
| 1390 |
|
| 1391 |
The number of active connections. This is not the number of currently |
| 1392 |
running requests, but the number of currently open and non-idle TCP |
| 1393 |
connections. This number can be useful for load-leveling. |
| 1394 |
|
| 1395 |
=back |
| 1396 |
|
| 1397 |
=cut |
| 1398 |
|
| 1399 |
our @month = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); |
| 1400 |
our @weekday = qw(Sun Mon Tue Wed Thu Fri Sat); |
| 1401 |
|
| 1402 |
sub format_date($) { |
| 1403 |
my ($time) = @_; |
| 1404 |
|
| 1405 |
# RFC 822/1123 format |
| 1406 |
my ($S, $M, $H, $mday, $mon, $year, $wday, $yday, undef) = gmtime $time; |
| 1407 |
|
| 1408 |
sprintf "%s, %02d %s %04d %02d:%02d:%02d GMT", |
| 1409 |
$weekday[$wday], $mday, $month[$mon], $year + 1900, |
| 1410 |
$H, $M, $S; |
| 1411 |
} |
| 1412 |
|
| 1413 |
sub parse_date($) { |
| 1414 |
my ($date) = @_; |
| 1415 |
|
| 1416 |
my ($d, $m, $y, $H, $M, $S); |
| 1417 |
|
| 1418 |
if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) { |
| 1419 |
# RFC 822/1123, required by RFC 2616 (with " ") |
| 1420 |
# cookie dates (with "-") |
| 1421 |
|
| 1422 |
($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); |
| 1423 |
|
| 1424 |
} elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) { |
| 1425 |
# RFC 850 |
| 1426 |
($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); |
| 1427 |
|
| 1428 |
} elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) { |
| 1429 |
# ISO C's asctime |
| 1430 |
($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); |
| 1431 |
} |
| 1432 |
# other formats fail in the loop below |
| 1433 |
|
| 1434 |
for (0..11) { |
| 1435 |
if ($m eq $month[$_]) { |
| 1436 |
require Time::Local; |
| 1437 |
return eval { Time::Local::timegm ($S, $M, $H, $d, $_, $y) }; |
| 1438 |
} |
| 1439 |
} |
| 1440 |
|
| 1441 |
undef |
| 1442 |
} |
| 1443 |
|
| 1444 |
sub set_proxy($) { |
| 1445 |
if (length $_[0]) { |
| 1446 |
$_[0] =~ m%^(http):// ([^:/]+) (?: : (\d*) )?%ix |
| 1447 |
or Carp::croak "$_[0]: invalid proxy URL"; |
| 1448 |
$PROXY = [$2, $3 || 3128, $1] |
| 1449 |
} else { |
| 1450 |
undef $PROXY; |
| 1451 |
} |
| 1452 |
} |
| 1453 |
|
| 1454 |
# initialise proxy from environment |
| 1455 |
eval { |
| 1456 |
set_proxy $ENV{http_proxy}; |
| 1457 |
}; |
| 1458 |
|
| 1459 |
=head2 SHOWCASE |
| 1460 |
|
| 1461 |
This section contains some more elaborate "real-world" examples or code |
| 1462 |
snippets. |
| 1463 |
|
| 1464 |
=head2 HTTP/1.1 FILE DOWNLOAD |
| 1465 |
|
| 1466 |
Downloading files with HTTP can be quite tricky, especially when something |
| 1467 |
goes wrong and you want to resume. |
| 1468 |
|
| 1469 |
Here is a function that initiates and resumes a download. It uses the |
| 1470 |
last modified time to check for file content changes, and works with many |
| 1471 |
HTTP/1.0 servers as well, and usually falls back to a complete re-download |
| 1472 |
on older servers. |
| 1473 |
|
| 1474 |
It calls the completion callback with either C<undef>, which means a |
| 1475 |
nonretryable error occurred, C<0> when the download was partial and should |
| 1476 |
be retried, and C<1> if it was successful. |
| 1477 |
|
| 1478 |
use AnyEvent::HTTP; |
| 1479 |
|
| 1480 |
sub download($$$) { |
| 1481 |
my ($url, $file, $cb) = @_; |
| 1482 |
|
| 1483 |
open my $fh, "+<", $file |
| 1484 |
or die "$file: $!"; |
| 1485 |
|
| 1486 |
my %hdr; |
| 1487 |
my $ofs = 0; |
| 1488 |
|
| 1489 |
if (stat $fh and -s _) { |
| 1490 |
$ofs = -s _; |
| 1491 |
warn "-s is ", $ofs; |
| 1492 |
$hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9]; |
| 1493 |
$hdr{"range"} = "bytes=$ofs-"; |
| 1494 |
} |
| 1495 |
|
| 1496 |
http_get $url, |
| 1497 |
headers => \%hdr, |
| 1498 |
on_header => sub { |
| 1499 |
my ($hdr) = @_; |
| 1500 |
|
| 1501 |
if ($hdr->{Status} == 200 && $ofs) { |
| 1502 |
# resume failed |
| 1503 |
truncate $fh, $ofs = 0; |
| 1504 |
} |
| 1505 |
|
| 1506 |
sysseek $fh, $ofs, 0; |
| 1507 |
|
| 1508 |
1 |
| 1509 |
}, |
| 1510 |
on_body => sub { |
| 1511 |
my ($data, $hdr) = @_; |
| 1512 |
|
| 1513 |
if ($hdr->{Status} =~ /^2/) { |
| 1514 |
length $data == syswrite $fh, $data |
| 1515 |
or return; # abort on write errors |
| 1516 |
} |
| 1517 |
|
| 1518 |
1 |
| 1519 |
}, |
| 1520 |
sub { |
| 1521 |
my (undef, $hdr) = @_; |
| 1522 |
|
| 1523 |
my $status = $hdr->{Status}; |
| 1524 |
|
| 1525 |
if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) { |
| 1526 |
utime $time, $time, $fh; |
| 1527 |
} |
| 1528 |
|
| 1529 |
if ($status == 200 || $status == 206 || $status == 416) { |
| 1530 |
# download ok || resume ok || file already fully downloaded |
| 1531 |
$cb->(1, $hdr); |
| 1532 |
|
| 1533 |
} elsif ($status == 412) { |
| 1534 |
# file has changed while resuming, delete and retry |
| 1535 |
unlink $file; |
| 1536 |
$cb->(0, $hdr); |
| 1537 |
|
| 1538 |
} elsif ($status == 500 or $status == 503 or $status =~ /^59/) { |
| 1539 |
# retry later |
| 1540 |
$cb->(0, $hdr); |
| 1541 |
|
| 1542 |
} else { |
| 1543 |
$cb->(undef, $hdr); |
| 1544 |
} |
| 1545 |
} |
| 1546 |
; |
| 1547 |
} |
| 1548 |
|
| 1549 |
download "http://server/somelargefile", "/tmp/somelargefile", sub { |
| 1550 |
if ($_[0]) { |
| 1551 |
print "OK!\n"; |
| 1552 |
} elsif (defined $_[0]) { |
| 1553 |
print "please retry later\n"; |
| 1554 |
} else { |
| 1555 |
print "ERROR\n"; |
| 1556 |
} |
| 1557 |
}; |
| 1558 |
|
| 1559 |
=head3 SOCKS PROXIES |
| 1560 |
|
| 1561 |
Socks proxies are not directly supported by AnyEvent::HTTP. You can |
| 1562 |
compile your perl to support socks, or use an external program such as |
| 1563 |
F<socksify> (dante) or F<tsocks> to make your program use a socks proxy |
| 1564 |
transparently. |
| 1565 |
|
| 1566 |
Alternatively, for AnyEvent::HTTP only, you can use your own |
| 1567 |
C<tcp_connect> function that does the proxy handshake - here is an example |
| 1568 |
that works with socks4a proxies: |
| 1569 |
|
| 1570 |
use Errno; |
| 1571 |
use AnyEvent::Util; |
| 1572 |
use AnyEvent::Socket; |
| 1573 |
use AnyEvent::Handle; |
| 1574 |
|
| 1575 |
# host, port and username of/for your socks4a proxy |
| 1576 |
my $socks_host = "10.0.0.23"; |
| 1577 |
my $socks_port = 9050; |
| 1578 |
my $socks_user = ""; |
| 1579 |
|
| 1580 |
sub socks4a_connect { |
| 1581 |
my ($host, $port, $connect_cb, $prepare_cb) = @_; |
| 1582 |
|
| 1583 |
my $hdl = new AnyEvent::Handle |
| 1584 |
connect => [$socks_host, $socks_port], |
| 1585 |
on_prepare => sub { $prepare_cb->($_[0]{fh}) }, |
| 1586 |
on_error => sub { $connect_cb->() }, |
| 1587 |
; |
| 1588 |
|
| 1589 |
# we send ipv4 connects as socks4, ipv6 and dns as socks4a |
| 1590 |
my $ipn = AnyEvent::Socket::parse_ipv4 $host; |
| 1591 |
$hdl->push_write ( |
| 1592 |
$ipn ? pack "CCn a4 Z*" , 4, 1, $port, $ipn, $socks_user |
| 1593 |
: pack "CCn N Z*Z*", 4, 1, $port, 1, $socks_user, $host |
| 1594 |
); |
| 1595 |
|
| 1596 |
$hdl->push_read (chunk => 8, sub { |
| 1597 |
my ($hdl, $chunk) = @_; |
| 1598 |
my ($status, $port, $ipn) = unpack "xCna4", $chunk; |
| 1599 |
|
| 1600 |
if ($status == 0x5a) { |
| 1601 |
$connect_cb->($hdl->{fh}, (format_address $ipn) . ":$port"); |
| 1602 |
} else { |
| 1603 |
$! = Errno::ENXIO; $connect_cb->(); |
| 1604 |
} |
| 1605 |
}); |
| 1606 |
|
| 1607 |
$hdl |
| 1608 |
} |
| 1609 |
|
| 1610 |
Use C<socks4a_connect> instead of C<tcp_connect> when doing C<http_request>s, |
| 1611 |
possibly after switching off other proxy types: |
| 1612 |
|
| 1613 |
AnyEvent::HTTP::set_proxy undef; # usually you do not want other proxies |
| 1614 |
|
| 1615 |
http_get 'http://www.google.com', tcp_connect => \&socks4a_connect, sub { |
| 1616 |
my ($data, $headers) = @_; |
| 1617 |
... |
| 1618 |
}; |
| 1619 |
|
| 1620 |
=head1 SEE ALSO |
| 1621 |
|
| 1622 |
L<AnyEvent>. |
| 1623 |
|
| 1624 |
=head1 AUTHOR |
| 1625 |
|
| 1626 |
Marc Lehmann <schmorp@schmorp.de> |
| 1627 |
http://home.schmorp.de/ |
| 1628 |
|
| 1629 |
With many thanks to Дмитрий Шалашов, who provided countless |
| 1630 |
testcases and bugreports. |
| 1631 |
|
| 1632 |
=cut |
| 1633 |
|
| 1634 |
1 |
| 1635 |
|