ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
Revision: 1.134
Committed: Fri Sep 7 22:11:31 2018 UTC (5 years, 8 months ago) by root
Branch: MAIN
Changes since 1.133: +6 -0 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4    
5     =head1 SYNOPSIS
6    
7     use AnyEvent::HTTP;
8    
9 root 1.17 http_get "http://www.nethype.de/", sub { print $_[1] };
10    
11     # ... do something else here
12    
13 root 1.1 =head1 DESCRIPTION
14    
15     This module is an L<AnyEvent> user, you need to make sure that you use and
16     run a supported event loop.
17    
18 root 1.11 This module implements a simple, stateless and non-blocking HTTP
19     client. It supports GET, POST and other request methods, cookies and more,
20 root 1.94 all on a very low level. It can follow redirects, supports proxies, and
21 root 1.11 automatically limits the number of connections to the values specified in
22     the RFC.
23    
24     It should generally be a "good client" that is enough for most HTTP
25     tasks. Simple tasks should be simple, but complex tasks should still be
26     possible as the user retains control over request and response headers.
27    
28     The caller is responsible for authentication management, cookies (if
29     the simplistic implementation in this module doesn't suffice), referer
30     and other high-level protocol details for which this module offers only
31     limited support.
32    
33 root 1.1 =head2 METHODS
34    
35     =over 4
36    
37     =cut
38    
39     package AnyEvent::HTTP;
40    
41 root 1.85 use common::sense;
42 root 1.1
43 root 1.41 use Errno ();
44 root 1.1
45 root 1.51 use AnyEvent 5.0 ();
46 root 1.1 use AnyEvent::Util ();
47     use AnyEvent::Handle ();
48    
49     use base Exporter::;
50    
51 root 1.130 our $VERSION = 2.24;
52 root 1.1
53 root 1.17 our @EXPORT = qw(http_get http_post http_head http_request);
54 root 1.1
55 root 1.40 our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
56 root 1.3 our $MAX_RECURSE = 10;
57 root 1.92 our $PERSISTENT_TIMEOUT = 3;
58 root 1.2 our $TIMEOUT = 300;
59 root 1.92 our $MAX_PER_HOST = 4; # changing this is evil
60 root 1.1
61 root 1.2 our $PROXY;
62 root 1.14 our $ACTIVE = 0;
63 root 1.2
64 root 1.92 my %KA_CACHE; # indexed by uhost currently, points to [$handle...] array
65 root 1.11 my %CO_SLOT; # number of open connections, and wait queue, per host
66 root 1.1
67     =item http_get $url, key => value..., $cb->($data, $headers)
68    
69     Executes an HTTP-GET request. See the http_request function for details on
70 root 1.29 additional parameters and the return value.
71 root 1.1
72 root 1.5 =item http_head $url, key => value..., $cb->($data, $headers)
73    
74 root 1.29 Executes an HTTP-HEAD request. See the http_request function for details
75     on additional parameters and the return value.
76 root 1.5
77     =item http_post $url, $body, key => value..., $cb->($data, $headers)
78 root 1.3
79 root 1.26 Executes an HTTP-POST request with a request body of C<$body>. See the
80 root 1.29 http_request function for details on additional parameters and the return
81     value.
82 root 1.3
83 root 1.1 =item http_request $method => $url, key => value..., $cb->($data, $headers)
84    
85     Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
86     must be an absolute http or https URL.
87    
88 root 1.29 When called in void context, nothing is returned. In other contexts,
89     C<http_request> returns a "cancellation guard" - you have to keep the
90     object at least alive until the callback get called. If the object gets
91 root 1.58 destroyed before the callback is called, the request will be cancelled.
92 root 1.29
93 root 1.42 The callback will be called with the response body data as first argument
94 root 1.117 (or C<undef> if an error occurred), and a hash-ref with response headers
95 root 1.68 (and trailers) as second argument.
96 root 1.2
97 root 1.7 All the headers in that hash are lowercased. In addition to the response
98 root 1.55 headers, the "pseudo-headers" (uppercase to avoid clashing with possible
99     response headers) C<HTTPVersion>, C<Status> and C<Reason> contain the
100 root 1.64 three parts of the HTTP Status-Line of the same name. If an error occurs
101     during the body phase of a request, then the original C<Status> and
102     C<Reason> values from the header are available as C<OrigStatus> and
103     C<OrigReason>.
104 root 1.55
105     The pseudo-header C<URL> contains the actual URL (which can differ from
106     the requested URL when following redirects - for example, you might get
107     an error that your URL scheme is not supported even though your URL is a
108     valid http URL because it redirected to an ftp URL, in which case you can
109     look at the URL pseudo header).
110    
111     The pseudo-header C<Redirect> only exists when the request was a result
112     of an internal redirect. In that case it is an array reference with
113     the C<($data, $headers)> from the redirect response. Note that this
114     response could in turn be the result of a redirect itself, and C<<
115     $headers->{Redirect}[1]{Redirect} >> will then contain the original
116     response, and so on.
117 root 1.20
118 root 1.32 If the server sends a header multiple times, then their contents will be
119     joined together with a comma (C<,>), as per the HTTP spec.
120 root 1.2
121     If an internal error occurs, such as not being able to resolve a hostname,
122 root 1.77 then C<$data> will be C<undef>, C<< $headers->{Status} >> will be
123     C<590>-C<599> and the C<Reason> pseudo-header will contain an error
124     message. Currently the following status codes are used:
125    
126     =over 4
127    
128 root 1.114 =item 595 - errors during connection establishment, proxy handshake.
129 root 1.77
130     =item 596 - errors during TLS negotiation, request sending and header processing.
131    
132 root 1.78 =item 597 - errors during body receiving or processing.
133 root 1.77
134 root 1.78 =item 598 - user aborted request via C<on_header> or C<on_body>.
135 root 1.77
136     =item 599 - other, usually nonretryable, errors (garbled URL etc.).
137    
138     =back
139 root 1.2
140 root 1.6 A typical callback might look like this:
141    
142     sub {
143     my ($body, $hdr) = @_;
144    
145     if ($hdr->{Status} =~ /^2/) {
146     ... everything should be ok
147     } else {
148     print "error, $hdr->{Status} $hdr->{Reason}\n";
149     }
150     }
151    
152 root 1.1 Additional parameters are key-value pairs, and are fully optional. They
153     include:
154    
155     =over 4
156    
157 root 1.3 =item recurse => $count (default: $MAX_RECURSE)
158 root 1.1
159 root 1.112 Whether to recurse requests or not, e.g. on redirects, authentication and
160     other retries and so on, and how often to do so.
161 root 1.1
162 root 1.119 Only redirects to http and https URLs are supported. While most common
163     redirection forms are handled entirely within this module, some require
164     the use of the optional L<URI> module. If it is required but missing, then
165     the request will fail with an error.
166    
167 root 1.1 =item headers => hashref
168    
169 root 1.68 The request headers to use. Currently, C<http_request> may provide its own
170     C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers and
171 root 1.69 will provide defaults at least for C<TE:>, C<Referer:> and C<User-Agent:>
172     (this can be suppressed by using C<undef> for these headers in which case
173     they won't be sent at all).
174 root 1.1
175 root 1.90 You really should provide your own C<User-Agent:> header value that is
176     appropriate for your program - I wouldn't be surprised if the default
177     AnyEvent string gets blocked by webservers sooner or later.
178    
179 root 1.104 Also, make sure that your headers names and values do not contain any
180     embedded newlines.
181    
182 root 1.1 =item timeout => $seconds
183    
184     The time-out to use for various stages - each connect attempt will reset
185 root 1.51 the timeout, as will read or write activity, i.e. this is not an overall
186     timeout.
187    
188     Default timeout is 5 minutes.
189 root 1.2
190     =item proxy => [$host, $port[, $scheme]] or undef
191    
192 root 1.102 Use the given http proxy for all requests, or no proxy if C<undef> is
193     used.
194 root 1.2
195 root 1.92 C<$scheme> must be either missing or must be C<http> for HTTP.
196 root 1.1
197 root 1.102 If not specified, then the default proxy is used (see
198     C<AnyEvent::HTTP::set_proxy>).
199    
200 root 1.123 Currently, if your proxy requires authorization, you have to specify an
201     appropriate "Proxy-Authorization" header in every request.
202    
203 root 1.3 =item body => $string
204    
205 root 1.68 The request body, usually empty. Will be sent as-is (future versions of
206 root 1.3 this module might offer more options).
207    
208 root 1.10 =item cookie_jar => $hash_ref
209    
210     Passing this parameter enables (simplified) cookie-processing, loosely
211     based on the original netscape specification.
212    
213 root 1.80 The C<$hash_ref> must be an (initially empty) hash reference which
214     will get updated automatically. It is possible to save the cookie jar
215     to persistent storage with something like JSON or Storable - see the
216     C<AnyEvent::HTTP::cookie_jar_expire> function if you wish to remove
217     expired or session-only cookies, and also for documentation on the format
218     of the cookie jar.
219 root 1.10
220 root 1.70 Note that this cookie implementation is not meant to be complete. If
221     you want complete cookie management you have to do that on your
222 root 1.80 own. C<cookie_jar> is meant as a quick fix to get most cookie-using sites
223 root 1.70 working. Cookies are a privacy disaster, do not use them unless required
224     to.
225 root 1.10
226 root 1.69 When cookie processing is enabled, the C<Cookie:> and C<Set-Cookie:>
227 root 1.70 headers will be set and handled by this module, otherwise they will be
228 root 1.69 left untouched.
229    
230 root 1.40 =item tls_ctx => $scheme | $tls_ctx
231    
232     Specifies the AnyEvent::TLS context to be used for https connections. This
233     parameter follows the same rules as the C<tls_ctx> parameter to
234     L<AnyEvent::Handle>, but additionally, the two strings C<low> or
235     C<high> can be specified, which give you a predefined low-security (no
236     verification, highest compatibility) and high-security (CA and common-name
237     verification) TLS context.
238    
239     The default for this option is C<low>, which could be interpreted as "give
240     me the page, no matter what".
241    
242 root 1.92 See also the C<sessionid> parameter.
243    
244     =item session => $string
245    
246     The module might reuse connections to the same host internally. Sometimes
247     (e.g. when using TLS), you do not want to reuse connections from other
248     sessions. This can be achieved by setting this parameter to some unique
249     ID (such as the address of an object storing your state data, or the TLS
250     context) - only connections using the same unique ID will be reused.
251    
252 root 1.51 =item on_prepare => $callback->($fh)
253    
254     In rare cases you need to "tune" the socket before it is used to
255 root 1.117 connect (for example, to bind it on a given IP address). This parameter
256 root 1.51 overrides the prepare callback passed to C<AnyEvent::Socket::tcp_connect>
257     and behaves exactly the same way (e.g. it has to provide a
258     timeout). See the description for the C<$prepare_cb> argument of
259     C<AnyEvent::Socket::tcp_connect> for details.
260    
261 root 1.59 =item tcp_connect => $callback->($host, $service, $connect_cb, $prepare_cb)
262    
263     In even rarer cases you want total control over how AnyEvent::HTTP
264     establishes connections. Normally it uses L<AnyEvent::Socket::tcp_connect>
265     to do this, but you can provide your own C<tcp_connect> function -
266 root 1.60 obviously, it has to follow the same calling conventions, except that it
267     may always return a connection guard object.
268 root 1.59
269 root 1.134 The connections made by this hook will be treated as equivalent to
270     connecitons made the built-in way, specifically, they will be put into
271     and taken from the persistent conneciton cache. If your C<$tcp_connect>
272     function is incompatible with this kind of re-use, consider switching off
273     C<persistent> connections and/or providing a C<session> identifier.
274    
275 root 1.59 There are probably lots of weird uses for this function, starting from
276     tracing the hosts C<http_request> actually tries to connect, to (inexact
277     but fast) host => IP address caching or even socks protocol support.
278    
279 root 1.42 =item on_header => $callback->($headers)
280 root 1.41
281     When specified, this callback will be called with the header hash as soon
282     as headers have been successfully received from the remote server (not on
283     locally-generated errors).
284    
285     It has to return either true (in which case AnyEvent::HTTP will continue),
286     or false, in which case AnyEvent::HTTP will cancel the download (and call
287     the finish callback with an error code of C<598>).
288    
289     This callback is useful, among other things, to quickly reject unwanted
290     content, which, if it is supposed to be rare, can be faster than first
291     doing a C<HEAD> request.
292    
293 root 1.68 The downside is that cancelling the request makes it impossible to re-use
294     the connection. Also, the C<on_header> callback will not receive any
295     trailer (headers sent after the response body).
296    
297 root 1.42 Example: cancel the request unless the content-type is "text/html".
298 root 1.41
299 root 1.42 on_header => sub {
300     $_[0]{"content-type"} =~ /^text\/html\s*(?:;|$)/
301     },
302 root 1.41
303 root 1.42 =item on_body => $callback->($partial_body, $headers)
304 root 1.41
305 root 1.42 When specified, all body data will be passed to this callback instead of
306     to the completion callback. The completion callback will get the empty
307     string instead of the body data.
308 root 1.41
309 root 1.42 It has to return either true (in which case AnyEvent::HTTP will continue),
310     or false, in which case AnyEvent::HTTP will cancel the download (and call
311     the completion callback with an error code of C<598>).
312    
313 root 1.68 The downside to cancelling the request is that it makes it impossible to
314     re-use the connection.
315    
316 root 1.42 This callback is useful when the data is too large to be held in memory
317     (so the callback writes it to a file) or when only some information should
318     be extracted, or when the body should be processed incrementally.
319 root 1.41
320     It is usually preferred over doing your own body handling via
321 root 1.45 C<want_body_handle>, but in case of streaming APIs, where HTTP is
322     only used to create a connection, C<want_body_handle> is the better
323     alternative, as it allows you to install your own event handler, reducing
324     resource usage.
325 root 1.41
326     =item want_body_handle => $enable
327    
328     When enabled (default is disabled), the behaviour of AnyEvent::HTTP
329     changes considerably: after parsing the headers, and instead of
330     downloading the body (if any), the completion callback will be
331     called. Instead of the C<$body> argument containing the body data, the
332     callback will receive the L<AnyEvent::Handle> object associated with the
333     connection. In error cases, C<undef> will be passed. When there is no body
334     (e.g. status C<304>), the empty string will be passed.
335    
336 root 1.92 The handle object might or might not be in TLS mode, might be connected
337     to a proxy, be a persistent connection, use chunked transfer encoding
338     etc., and configured in unspecified ways. The user is responsible for this
339     handle (it will not be used by this module anymore).
340 root 1.41
341     This is useful with some push-type services, where, after the initial
342     headers, an interactive protocol is used (typical example would be the
343     push-style twitter API which starts a JSON/XML stream).
344    
345     If you think you need this, first have a look at C<on_body>, to see if
346 root 1.45 that doesn't solve your problem in a better way.
347 root 1.41
348 root 1.92 =item persistent => $boolean
349    
350     Try to create/reuse a persistent connection. When this flag is set
351     (default: true for idempotent requests, false for all others), then
352     C<http_request> tries to re-use an existing (previously-created)
353 root 1.133 persistent connection to same host (i.e. identical URL scheme, hostname,
354     port and session) and, failing that, tries to create a new one.
355 root 1.92
356     Requests failing in certain ways will be automatically retried once, which
357     is dangerous for non-idempotent requests, which is why it defaults to off
358     for them. The reason for this is because the bozos who designed HTTP/1.1
359     made it impossible to distinguish between a fatal error and a normal
360     connection timeout, so you never know whether there was a problem with
361     your request or not.
362    
363     When reusing an existent connection, many parameters (such as TLS context)
364     will be ignored. See the C<session> parameter for a workaround.
365    
366     =item keepalive => $boolean
367    
368     Only used when C<persistent> is also true. This parameter decides whether
369     C<http_request> tries to handshake a HTTP/1.0-style keep-alive connection
370     (as opposed to only a HTTP/1.1 persistent connection).
371    
372     The default is true, except when using a proxy, in which case it defaults
373     to false, as HTTP/1.0 proxies cannot support this in a meaningful way.
374    
375     =item handle_params => { key => value ... }
376    
377     The key-value pairs in this hash will be passed to any L<AnyEvent::Handle>
378     constructor that is called - not all requests will create a handle, and
379     sometimes more than one is created, so this parameter is only good for
380     setting hints.
381    
382     Example: set the maximum read size to 4096, to potentially conserve memory
383     at the cost of speed.
384    
385     handle_params => {
386     max_read_size => 4096,
387     },
388    
389 root 1.1 =back
390    
391 root 1.68 Example: do a simple HTTP GET request for http://www.nethype.de/ and print
392     the response body.
393 root 1.9
394     http_request GET => "http://www.nethype.de/", sub {
395     my ($body, $hdr) = @_;
396     print "$body\n";
397     };
398    
399 root 1.68 Example: do a HTTP HEAD request on https://www.google.com/, use a
400 root 1.9 timeout of 30 seconds.
401    
402     http_request
403 root 1.109 HEAD => "https://www.google.com",
404 root 1.90 headers => { "user-agent" => "MySearchClient 1.0" },
405 root 1.9 timeout => 30,
406     sub {
407     my ($body, $hdr) = @_;
408     use Data::Dumper;
409     print Dumper $hdr;
410     }
411     ;
412 root 1.1
413 root 1.68 Example: do another simple HTTP GET request, but immediately try to
414 root 1.29 cancel it.
415    
416     my $request = http_request GET => "http://www.nethype.de/", sub {
417     my ($body, $hdr) = @_;
418     print "$body\n";
419     };
420    
421     undef $request;
422    
423 root 1.1 =cut
424    
425 root 1.92 #############################################################################
426     # wait queue/slots
427    
428 root 1.12 sub _slot_schedule;
429 root 1.11 sub _slot_schedule($) {
430     my $host = shift;
431    
432     while ($CO_SLOT{$host}[0] < $MAX_PER_HOST) {
433     if (my $cb = shift @{ $CO_SLOT{$host}[1] }) {
434 root 1.12 # somebody wants that slot
435 root 1.11 ++$CO_SLOT{$host}[0];
436 root 1.14 ++$ACTIVE;
437 root 1.11
438     $cb->(AnyEvent::Util::guard {
439 root 1.14 --$ACTIVE;
440 root 1.11 --$CO_SLOT{$host}[0];
441     _slot_schedule $host;
442     });
443     } else {
444     # nobody wants the slot, maybe we can forget about it
445     delete $CO_SLOT{$host} unless $CO_SLOT{$host}[0];
446     last;
447     }
448     }
449     }
450    
451     # wait for a free slot on host, call callback
452     sub _get_slot($$) {
453     push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
454    
455     _slot_schedule $_[0];
456     }
457    
458 root 1.80 #############################################################################
459 root 1.92 # cookie handling
460 root 1.80
461     # expire cookies
462     sub cookie_jar_expire($;$) {
463     my ($jar, $session_end) = @_;
464    
465 root 1.130 %$jar = () if $jar->{version} != 2;
466 root 1.80
467     my $anow = AE::now;
468    
469     while (my ($chost, $paths) = each %$jar) {
470     next unless ref $paths;
471    
472     while (my ($cpath, $cookies) = each %$paths) {
473     while (my ($cookie, $kv) = each %$cookies) {
474     if (exists $kv->{_expires}) {
475     delete $cookies->{$cookie}
476     if $anow > $kv->{_expires};
477     } elsif ($session_end) {
478     delete $cookies->{$cookie};
479     }
480     }
481    
482     delete $paths->{$cpath}
483     unless %$cookies;
484     }
485    
486     delete $jar->{$chost}
487     unless %$paths;
488     }
489     }
490    
491 root 1.72 # extract cookies from jar
492 root 1.71 sub cookie_jar_extract($$$$) {
493 root 1.92 my ($jar, $scheme, $host, $path) = @_;
494 root 1.71
495 root 1.130 %$jar = () if $jar->{version} != 2;
496    
497     $host = AnyEvent::Util::idn_to_ascii $host
498     if $host =~ /[^\x00-\x7f]/;
499 root 1.71
500     my @cookies;
501    
502     while (my ($chost, $paths) = each %$jar) {
503     next unless ref $paths;
504    
505 root 1.130 # exact match or suffix including . match
506     $chost eq $host or ".$chost" eq substr $host, -1 - length $chost
507     or next;
508 root 1.71
509     while (my ($cpath, $cookies) = each %$paths) {
510 root 1.92 next unless $cpath eq substr $path, 0, length $cpath;
511 root 1.71
512     while (my ($cookie, $kv) = each %$cookies) {
513 root 1.92 next if $scheme ne "https" && exists $kv->{secure};
514 root 1.71
515 root 1.80 if (exists $kv->{_expires} and AE::now > $kv->{_expires}) {
516     delete $cookies->{$cookie};
517     next;
518 root 1.71 }
519    
520     my $value = $kv->{value};
521    
522     if ($value =~ /[=;,[:space:]]/) {
523     $value =~ s/([\\"])/\\$1/g;
524     $value = "\"$value\"";
525     }
526    
527     push @cookies, "$cookie=$value";
528     }
529     }
530     }
531    
532     \@cookies
533     }
534    
535 root 1.72 # parse set_cookie header into jar
536 root 1.80 sub cookie_jar_set_cookie($$$$) {
537 root 1.92 my ($jar, $set_cookie, $host, $date) = @_;
538 root 1.80
539 root 1.130 %$jar = () if $jar->{version} != 2;
540    
541 root 1.80 my $anow = int AE::now;
542     my $snow; # server-now
543 root 1.72
544     for ($set_cookie) {
545     # parse NAME=VALUE
546     my @kv;
547    
548 root 1.79 # expires is not http-compliant in the original cookie-spec,
549     # we support the official date format and some extensions
550 root 1.72 while (
551     m{
552     \G\s*
553     (?:
554 root 1.79 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
555 root 1.101 | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^;,[:space:]]*) ) )?
556 root 1.72 )
557     }gcxsi
558     ) {
559     my $name = $2;
560     my $value = $4;
561    
562 root 1.82 if (defined $1) {
563 root 1.72 # expires
564     $name = "expires";
565     $value = $1;
566 root 1.82 } elsif (defined $3) {
567 root 1.72 # quoted
568     $value = $3;
569     $value =~ s/\\(.)/$1/gs;
570     }
571    
572 root 1.95 push @kv, @kv ? lc $name : $name, $value;
573 root 1.72
574     last unless /\G\s*;/gc;
575     }
576    
577     last unless @kv;
578    
579     my $name = shift @kv;
580     my %kv = (value => shift @kv, @kv);
581    
582 root 1.80 if (exists $kv{"max-age"}) {
583     $kv{_expires} = $anow + delete $kv{"max-age"};
584     } elsif (exists $kv{expires}) {
585     $snow ||= parse_date ($date) || $anow;
586     $kv{_expires} = $anow + (parse_date (delete $kv{expires}) - $snow);
587     } else {
588     delete $kv{_expires};
589     }
590 root 1.72
591     my $cdom;
592     my $cpath = (delete $kv{path}) || "/";
593    
594     if (exists $kv{domain}) {
595 root 1.130 $cdom = $kv{domain};
596 root 1.72
597     $cdom =~ s/^\.?/./; # make sure it starts with a "."
598    
599     next if $cdom =~ /\.$/;
600    
601     # this is not rfc-like and not netscape-like. go figure.
602     my $ndots = $cdom =~ y/.//;
603     next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
604 root 1.130
605     $cdom = substr $cdom, 1; # remove initial .
606 root 1.72 } else {
607 root 1.92 $cdom = $host;
608 root 1.72 }
609    
610     # store it
611 root 1.130 $jar->{version} = 2;
612 root 1.83 $jar->{lc $cdom}{$cpath}{$name} = \%kv;
613 root 1.72
614     redo if /\G\s*,/gc;
615     }
616     }
617    
618 root 1.92 #############################################################################
619     # keepalive/persistent connection cache
620    
621     # fetch a connection from the keepalive cache
622     sub ka_fetch($) {
623     my $ka_key = shift;
624    
625     my $hdl = pop @{ $KA_CACHE{$ka_key} }; # currently we reuse the MOST RECENTLY USED connection
626     delete $KA_CACHE{$ka_key}
627     unless @{ $KA_CACHE{$ka_key} };
628    
629     $hdl
630     }
631    
632     sub ka_store($$) {
633     my ($ka_key, $hdl) = @_;
634    
635     my $kaa = $KA_CACHE{$ka_key} ||= [];
636    
637     my $destroy = sub {
638     my @ka = grep $_ != $hdl, @{ $KA_CACHE{$ka_key} };
639    
640     $hdl->destroy;
641    
642     @ka
643     ? $KA_CACHE{$ka_key} = \@ka
644     : delete $KA_CACHE{$ka_key};
645     };
646    
647     # on error etc., destroy
648     $hdl->on_error ($destroy);
649     $hdl->on_eof ($destroy);
650     $hdl->on_read ($destroy);
651     $hdl->timeout ($PERSISTENT_TIMEOUT);
652    
653     push @$kaa, $hdl;
654     shift @$kaa while @$kaa > $MAX_PER_HOST;
655     }
656    
657     #############################################################################
658     # utilities
659    
660 root 1.66 # continue to parse $_ for headers and place them into the arg
661 root 1.92 sub _parse_hdr() {
662 root 1.66 my %hdr;
663    
664     # things seen, not parsed:
665     # p3pP="NON CUR OTPi OUR NOR UNI"
666    
667     $hdr{lc $1} .= ",$2"
668     while /\G
669     ([^:\000-\037]*):
670     [\011\040]*
671     ((?: [^\012]+ | \012[\011\040] )*)
672     \012
673     /gxc;
674    
675     /\G$/
676     or return;
677    
678     # remove the "," prefix we added to all headers above
679     substr $_, 0, 1, ""
680     for values %hdr;
681    
682     \%hdr
683     }
684    
685 root 1.92 #############################################################################
686     # http_get
687    
688 root 1.46 our $qr_nlnl = qr{(?<![^\012])\015?\012};
689 root 1.34
690 root 1.41 our $TLS_CTX_LOW = { cache => 1, sslv2 => 1 };
691     our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_peername => "https" };
692 root 1.40
693 root 1.92 # maybe it should just become a normal object :/
694    
695     sub _destroy_state(\%) {
696     my ($state) = @_;
697    
698     $state->{handle}->destroy if $state->{handle};
699     %$state = ();
700     }
701    
702     sub _error(\%$$) {
703     my ($state, $cb, $hdr) = @_;
704    
705     &_destroy_state ($state);
706    
707     $cb->(undef, $hdr);
708     ()
709     }
710    
711 root 1.116 our %IDEMPOTENT = (
712     DELETE => 1,
713     GET => 1,
714     HEAD => 1,
715     OPTIONS => 1,
716     PUT => 1,
717     TRACE => 1,
718    
719     ACL => 1,
720     "BASELINE-CONTROL" => 1,
721     BIND => 1,
722     CHECKIN => 1,
723     CHECKOUT => 1,
724     COPY => 1,
725     LABEL => 1,
726     LINK => 1,
727     MERGE => 1,
728     MKACTIVITY => 1,
729     MKCALENDAR => 1,
730     MKCOL => 1,
731     MKREDIRECTREF => 1,
732     MKWORKSPACE => 1,
733     MOVE => 1,
734     ORDERPATCH => 1,
735     PROPFIND => 1,
736     PROPPATCH => 1,
737     REBIND => 1,
738     REPORT => 1,
739     SEARCH => 1,
740     UNBIND => 1,
741     UNCHECKOUT => 1,
742     UNLINK => 1,
743     UNLOCK => 1,
744     UPDATE => 1,
745     UPDATEREDIRECTREF => 1,
746     "VERSION-CONTROL" => 1,
747     );
748    
749 elmex 1.15 sub http_request($$@) {
750 root 1.1 my $cb = pop;
751     my ($method, $url, %arg) = @_;
752    
753     my %hdr;
754    
755 root 1.40 $arg{tls_ctx} = $TLS_CTX_LOW if $arg{tls_ctx} eq "low" || !exists $arg{tls_ctx};
756     $arg{tls_ctx} = $TLS_CTX_HIGH if $arg{tls_ctx} eq "high";
757    
758 root 1.3 $method = uc $method;
759    
760 root 1.8 if (my $hdr = $arg{headers}) {
761 root 1.1 while (my ($k, $v) = each %$hdr) {
762     $hdr{lc $k} = $v;
763     }
764     }
765    
766 root 1.55 # pseudo headers for all subsequent responses
767     my @pseudo = (URL => $url);
768     push @pseudo, Redirect => delete $arg{Redirect} if exists $arg{Redirect};
769    
770 root 1.23 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE;
771 root 1.8
772 root 1.64 return $cb->(undef, { @pseudo, Status => 599, Reason => "Too many redirections" })
773 root 1.8 if $recurse < 0;
774    
775 root 1.102 my $proxy = exists $arg{proxy} ? $arg{proxy} : $PROXY;
776 root 1.1 my $timeout = $arg{timeout} || $TIMEOUT;
777    
778 root 1.92 my ($uscheme, $uauthority, $upath, $query, undef) = # ignore fragment
779 root 1.100 $url =~ m|^([^:]+):(?://([^/?#]*))?([^?#]*)(?:(\?[^#]*))?(?:#(.*))?$|;
780 root 1.2
781 root 1.31 $uscheme = lc $uscheme;
782 root 1.1
783 root 1.31 my $uport = $uscheme eq "http" ? 80
784     : $uscheme eq "https" ? 443
785 root 1.64 : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" });
786 root 1.13
787 root 1.124 $uauthority =~ /^(?: .*\@ )? ([^\@]+?) (?: : (\d+) )?$/x
788 root 1.64 or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" });
789 root 1.10
790 root 1.86 my $uhost = lc $1;
791 root 1.10 $uport = $2 if defined $2;
792    
793 root 1.53 $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
794     unless exists $hdr{host};
795 root 1.43
796 root 1.10 $uhost =~ s/^\[(.*)\]$/$1/;
797 root 1.56 $upath .= $query if length $query;
798 root 1.10
799     $upath =~ s%^/?%/%;
800    
801     # cookie processing
802     if (my $jar = $arg{cookie_jar}) {
803 root 1.71 my $cookies = cookie_jar_extract $jar, $uscheme, $uhost, $upath;
804 root 1.70
805 root 1.71 $hdr{cookie} = join "; ", @$cookies
806     if @$cookies;
807 root 1.10 }
808 root 1.1
809 root 1.31 my ($rhost, $rport, $rscheme, $rpath); # request host, port, path
810 root 1.2
811 root 1.10 if ($proxy) {
812 root 1.38 ($rpath, $rhost, $rport, $rscheme) = ($url, @$proxy);
813 root 1.31
814 root 1.47 $rscheme = "http" unless defined $rscheme;
815    
816 root 1.31 # don't support https requests over https-proxy transport,
817 root 1.38 # can't be done with tls as spec'ed, unless you double-encrypt.
818 root 1.31 $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https";
819 root 1.86
820     $rhost = lc $rhost;
821     $rscheme = lc $rscheme;
822 root 1.10 } else {
823 root 1.31 ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
824 root 1.2 }
825    
826 root 1.47 # leave out fragment and query string, just a heuristic
827 root 1.66 $hdr{referer} = "$uscheme://$uauthority$upath" unless exists $hdr{referer};
828     $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"};
829 root 1.41
830 root 1.53 $hdr{"content-length"} = length $arg{body}
831     if length $arg{body} || $method ne "GET";
832 root 1.1
833 root 1.116 my $idempotent = $IDEMPOTENT{$method};
834 root 1.89
835     # default value for keepalive is true iff the request is for an idempotent method
836 root 1.103 my $persistent = exists $arg{persistent} ? !!$arg{persistent} : $idempotent;
837     my $keepalive = exists $arg{keepalive} ? !!$arg{keepalive} : !$proxy;
838     my $was_persistent; # true if this is actually a recycled connection
839 root 1.92
840     # the key to use in the keepalive cache
841 root 1.105 my $ka_key = "$uscheme\x00$uhost\x00$uport\x00$arg{sessionid}";
842 root 1.89
843 root 1.118 $hdr{connection} = ($persistent ? $keepalive ? "keep-alive, " : "" : "close, ") . "Te"; #1.1
844 root 1.68 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
845 root 1.66
846 root 1.11 my %state = (connect_guard => 1);
847    
848 root 1.88 my $ae_error = 595; # connecting
849 root 1.1
850 root 1.88 # handle actual, non-tunneled, request
851     my $handle_actual_request = sub {
852     $ae_error = 596; # request phase
853    
854 root 1.92 my $hdl = $state{handle};
855    
856     $hdl->starttls ("connect") if $uscheme eq "https" && !exists $hdl->{tls};
857 root 1.88
858     # send request
859 root 1.92 $hdl->push_write (
860 root 1.88 "$method $rpath HTTP/1.1\015\012"
861     . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
862     . "\015\012"
863 root 1.120 . $arg{body}
864 root 1.88 );
865    
866 root 1.117 # return if error occurred during push_write()
867 root 1.88 return unless %state;
868    
869 root 1.89 # reduce memory usage, save a kitten, also re-use it for the response headers.
870     %hdr = ();
871 root 1.88
872     # status line and headers
873     $state{read_response} = sub {
874 root 1.92 return unless %state;
875    
876 root 1.88 for ("$_[1]") {
877     y/\015//d; # weed out any \015, as they show up in the weirdest of places.
878    
879     /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
880 root 1.92 or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid server response" };
881 root 1.88
882     # 100 Continue handling
883     # should not happen as we don't send expect: 100-continue,
884     # but we handle it just in case.
885     # since we send the request body regardless, if we get an error
886     # we are out of-sync, which we currently do NOT handle correctly.
887     return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
888     if $2 eq 100;
889    
890     push @pseudo,
891     HTTPVersion => $1,
892     Status => $2,
893     Reason => $3,
894     ;
895 root 1.1
896 root 1.92 my $hdr = _parse_hdr
897     or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Garbled response headers" };
898 root 1.77
899 root 1.88 %hdr = (%$hdr, @pseudo);
900     }
901 root 1.82
902 root 1.88 # redirect handling
903 root 1.119 # relative uri handling forced by microsoft and other shitheads.
904     # we give our best and fall back to URI if available.
905     if (exists $hdr{location}) {
906     my $loc = $hdr{location};
907    
908     if ($loc =~ m%^//%) { # //
909 root 1.126 $loc = "$uscheme:$loc";
910 root 1.119
911     } elsif ($loc eq "") {
912     $loc = $url;
913    
914     } elsif ($loc !~ /^(?: $ | [^:\/?\#]+ : )/x) { # anything "simple"
915     $loc =~ s/^\.\/+//;
916    
917     if ($loc !~ m%^[.?#]%) {
918 root 1.129 my $prefix = "$uscheme://$uauthority";
919 root 1.119
920     unless ($loc =~ s/^\///) {
921     $prefix .= $upath;
922     $prefix =~ s/\/[^\/]*$//;
923     }
924    
925     $loc = "$prefix/$loc";
926    
927     } elsif (eval { require URI }) { # uri
928     $loc = URI->new_abs ($loc, $url)->as_string;
929    
930     } else {
931     return _error %state, $cb, { @pseudo, Status => 599, Reason => "Cannot parse Location (URI module missing)" };
932     #$hdr{Status} = 599;
933     #$hdr{Reason} = "Unparsable Redirect (URI module missing)";
934     #$recurse = 0;
935     }
936 root 1.82 }
937    
938 root 1.119 $hdr{location} = $loc;
939 root 1.88 }
940 root 1.82
941 root 1.88 my $redirect;
942 root 1.82
943 root 1.88 if ($recurse) {
944     my $status = $hdr{Status};
945 root 1.82
946 root 1.88 # industry standard is to redirect POST as GET for
947     # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
948     # also, the UA should ask the user for 301 and 307 and POST,
949     # industry standard seems to be to simply follow.
950 root 1.119 # we go with the industry standard. 308 is defined
951 root 1.121 # by rfc7538
952 root 1.88 if ($status == 301 or $status == 302 or $status == 303) {
953 root 1.122 $redirect = 1;
954 root 1.88 # HTTP/1.1 is unclear on how to mutate the method
955 root 1.122 unless ($method eq "HEAD") {
956     $method = "GET";
957     delete $arg{body};
958     }
959 root 1.119 } elsif ($status == 307 or $status == 308) {
960 root 1.88 $redirect = 1;
961 root 1.82 }
962 root 1.88 }
963 root 1.82
964 root 1.103 my $finish = sub { # ($data, $err_status, $err_reason[, $persistent])
965 root 1.92 if ($state{handle}) {
966     # handle keepalive
967     if (
968 root 1.103 $persistent
969 root 1.92 && $_[3]
970     && ($hdr{HTTPVersion} < 1.1
971     ? $hdr{connection} =~ /\bkeep-?alive\b/i
972     : $hdr{connection} !~ /\bclose\b/i)
973     ) {
974     ka_store $ka_key, delete $state{handle};
975     } else {
976     # no keepalive, destroy the handle
977     $state{handle}->destroy;
978     }
979     }
980 root 1.82
981 root 1.88 %state = ();
982 root 1.82
983 root 1.88 if (defined $_[1]) {
984     $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
985     $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
986     }
987 root 1.82
988 root 1.88 # set-cookie processing
989     if ($arg{cookie_jar}) {
990     cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
991     }
992 root 1.82
993 root 1.88 if ($redirect && exists $hdr{location}) {
994     # we ignore any errors, as it is very common to receive
995     # Content-Length != 0 but no actual body
996     # we also access %hdr, as $_[1] might be an erro
997 root 1.103 $state{recurse} =
998     http_request (
999     $method => $hdr{location},
1000     %arg,
1001     recurse => $recurse - 1,
1002     Redirect => [$_[0], \%hdr],
1003     sub {
1004     %state = ();
1005     &$cb
1006     },
1007     );
1008 root 1.88 } else {
1009     $cb->($_[0], \%hdr);
1010     }
1011     };
1012 root 1.82
1013 root 1.88 $ae_error = 597; # body phase
1014 root 1.82
1015 root 1.91 my $chunked = $hdr{"transfer-encoding"} =~ /\bchunked\b/i; # not quite correct...
1016    
1017     my $len = $chunked ? undef : $hdr{"content-length"};
1018 root 1.82
1019 root 1.88 # body handling, many different code paths
1020     # - no body expected
1021     # - want_body_handle
1022     # - te chunked
1023     # - 2x length known (with or without on_body)
1024     # - 2x length not known (with or without on_body)
1025     if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
1026     $finish->(undef, 598 => "Request cancelled by on_header");
1027     } elsif (
1028     $hdr{Status} =~ /^(?:1..|204|205|304)$/
1029     or $method eq "HEAD"
1030     or (defined $len && $len == 0) # == 0, not !, because "0 " is true
1031     ) {
1032     # no body
1033     $finish->("", undef, undef, 1);
1034    
1035     } elsif (!$redirect && $arg{want_body_handle}) {
1036     $_[0]->on_eof (undef);
1037     $_[0]->on_error (undef);
1038     $_[0]->on_read (undef);
1039    
1040     $finish->(delete $state{handle});
1041    
1042 root 1.91 } elsif ($chunked) {
1043 root 1.88 my $cl = 0;
1044 root 1.92 my $body = "";
1045 root 1.128 my $on_body = (!$redirect && $arg{on_body}) || sub { $body .= shift; 1 };
1046 root 1.88
1047     $state{read_chunk} = sub {
1048     $_[1] =~ /^([0-9a-fA-F]+)/
1049 root 1.108 or return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
1050 root 1.82
1051 root 1.88 my $len = hex $1;
1052 root 1.82
1053 root 1.88 if ($len) {
1054     $cl += $len;
1055 root 1.82
1056 root 1.88 $_[0]->push_read (chunk => $len, sub {
1057     $on_body->($_[1], \%hdr)
1058     or return $finish->(undef, 598 => "Request cancelled by on_body");
1059 root 1.82
1060 root 1.88 $_[0]->push_read (line => sub {
1061     length $_[1]
1062     and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
1063     $_[0]->push_read (line => $state{read_chunk});
1064 root 1.82 });
1065 root 1.88 });
1066     } else {
1067     $hdr{"content-length"} ||= $cl;
1068 root 1.84
1069 root 1.88 $_[0]->push_read (line => $qr_nlnl, sub {
1070     if (length $_[1]) {
1071     for ("$_[1]") {
1072     y/\015//d; # weed out any \015, as they show up in the weirdest of places.
1073 root 1.84
1074 root 1.92 my $hdr = _parse_hdr
1075 root 1.88 or return $finish->(undef, $ae_error => "Garbled response trailers");
1076 root 1.84
1077 root 1.88 %hdr = (%hdr, %$hdr);
1078     }
1079     }
1080 root 1.84
1081 root 1.88 $finish->($body, undef, undef, 1);
1082 root 1.84 });
1083     }
1084 root 1.88 };
1085    
1086     $_[0]->push_read (line => $state{read_chunk});
1087    
1088 root 1.128 } elsif (!$redirect && $arg{on_body}) {
1089 root 1.88 if (defined $len) {
1090     $_[0]->on_read (sub {
1091     $len -= length $_[0]{rbuf};
1092    
1093     $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
1094     or return $finish->(undef, 598 => "Request cancelled by on_body");
1095    
1096     $len > 0
1097     or $finish->("", undef, undef, 1);
1098     });
1099 root 1.84 } else {
1100 root 1.88 $_[0]->on_eof (sub {
1101     $finish->("");
1102     });
1103     $_[0]->on_read (sub {
1104     $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
1105     or $finish->(undef, 598 => "Request cancelled by on_body");
1106     });
1107     }
1108     } else {
1109     $_[0]->on_eof (undef);
1110 root 1.82
1111 root 1.88 if (defined $len) {
1112     $_[0]->on_read (sub {
1113     $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
1114     if $len <= length $_[0]{rbuf};
1115     });
1116     } else {
1117     $_[0]->on_error (sub {
1118     ($! == Errno::EPIPE || !$!)
1119     ? $finish->(delete $_[0]{rbuf})
1120     : $finish->(undef, $ae_error => $_[2]);
1121     });
1122     $_[0]->on_read (sub { });
1123 root 1.82 }
1124 root 1.88 }
1125     };
1126 root 1.82
1127 root 1.92 # if keepalive is enabled, then the server closing the connection
1128     # before a response can happen legally - we retry on idempotent methods.
1129 root 1.103 if ($was_persistent && $idempotent) {
1130 root 1.92 my $old_eof = $hdl->{on_eof};
1131     $hdl->{on_eof} = sub {
1132     _destroy_state %state;
1133    
1134 root 1.103 %state = ();
1135     $state{recurse} =
1136     http_request (
1137 root 1.124 $method => $url,
1138 root 1.103 %arg,
1139 root 1.124 recurse => $recurse - 1,
1140     persistent => 0,
1141 root 1.103 sub {
1142     %state = ();
1143     &$cb
1144     }
1145     );
1146 root 1.92 };
1147     $hdl->on_read (sub {
1148     return unless %state;
1149    
1150     # as soon as we receive something, a connection close
1151     # once more becomes a hard error
1152     $hdl->{on_eof} = $old_eof;
1153     $hdl->push_read (line => $qr_nlnl, $state{read_response});
1154     });
1155     } else {
1156     $hdl->push_read (line => $qr_nlnl, $state{read_response});
1157     }
1158     };
1159    
1160     my $prepare_handle = sub {
1161     my ($hdl) = $state{handle};
1162    
1163     $hdl->on_error (sub {
1164     _error %state, $cb, { @pseudo, Status => $ae_error, Reason => $_[2] };
1165     });
1166     $hdl->on_eof (sub {
1167     _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" };
1168     });
1169 root 1.103 $hdl->timeout_reset;
1170     $hdl->timeout ($timeout);
1171 root 1.88 };
1172 root 1.82
1173 root 1.92 # connected to proxy (or origin server)
1174 root 1.88 my $connect_cb = sub {
1175 root 1.92 my $fh = shift
1176     or return _error %state, $cb, { @pseudo, Status => $ae_error, Reason => "$!" };
1177 root 1.44
1178 root 1.88 return unless delete $state{connect_guard};
1179 root 1.11
1180 root 1.88 # get handle
1181     $state{handle} = new AnyEvent::Handle
1182 root 1.92 %{ $arg{handle_params} },
1183     fh => $fh,
1184     peername => $uhost,
1185 root 1.88 tls_ctx => $arg{tls_ctx},
1186     ;
1187 root 1.11
1188 root 1.92 $prepare_handle->();
1189 root 1.1
1190 root 1.92 #$state{handle}->starttls ("connect") if $rscheme eq "https";
1191 root 1.88
1192     # now handle proxy-CONNECT method
1193     if ($proxy && $uscheme eq "https") {
1194     # oh dear, we have to wrap it into a connect request
1195    
1196 root 1.122 my $auth = exists $hdr{"proxy-authorization"}
1197     ? "proxy-authorization: " . (delete $hdr{"proxy-authorization"}) . "\015\012"
1198     : "";
1199    
1200 root 1.88 # maybe re-use $uauthority with patched port?
1201 root 1.122 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012$auth\015\012");
1202 root 1.88 $state{handle}->push_read (line => $qr_nlnl, sub {
1203     $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
1204 root 1.92 or return _error %state, $cb, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" };
1205 root 1.88
1206     if ($2 == 200) {
1207     $rpath = $upath;
1208     $handle_actual_request->();
1209     } else {
1210 root 1.92 _error %state, $cb, { @pseudo, Status => $2, Reason => $3 };
1211 root 1.88 }
1212     });
1213     } else {
1214 root 1.122 delete $hdr{"proxy-authorization"} unless $proxy;
1215    
1216 root 1.88 $handle_actual_request->();
1217     }
1218     };
1219    
1220     _get_slot $uhost, sub {
1221     $state{slot_guard} = shift;
1222 root 1.64
1223 root 1.88 return unless $state{connect_guard};
1224 root 1.64
1225 root 1.92 # try to use an existing keepalive connection, but only if we, ourselves, plan
1226     # on a keepalive request (in theory, this should be a separate config option).
1227 root 1.103 if ($persistent && $KA_CACHE{$ka_key}) {
1228     $was_persistent = 1;
1229    
1230 root 1.92 $state{handle} = ka_fetch $ka_key;
1231 root 1.129 # $state{handle}->destroyed
1232     # and die "AnyEvent::HTTP: unexpectedly got a destructed handle (1), please report.";#d#
1233 root 1.92 $prepare_handle->();
1234 root 1.129 # $state{handle}->destroyed
1235     # and die "AnyEvent::HTTP: unexpectedly got a destructed handle (2), please report.";#d#
1236 root 1.92 $handle_actual_request->();
1237    
1238     } else {
1239     my $tcp_connect = $arg{tcp_connect}
1240     || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
1241 root 1.57
1242 root 1.92 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });
1243     }
1244 root 1.1 };
1245    
1246 root 1.92 defined wantarray && AnyEvent::Util::guard { _destroy_state %state }
1247 root 1.1 }
1248    
1249 elmex 1.15 sub http_get($@) {
1250 root 1.1 unshift @_, "GET";
1251     &http_request
1252     }
1253    
1254 elmex 1.15 sub http_head($@) {
1255 root 1.4 unshift @_, "HEAD";
1256     &http_request
1257     }
1258    
1259 elmex 1.15 sub http_post($$@) {
1260 root 1.22 my $url = shift;
1261     unshift @_, "POST", $url, "body";
1262 root 1.3 &http_request
1263     }
1264    
1265 root 1.9 =back
1266    
1267 root 1.55 =head2 DNS CACHING
1268    
1269     AnyEvent::HTTP uses the AnyEvent::Socket::tcp_connect function for
1270     the actual connection, which in turn uses AnyEvent::DNS to resolve
1271     hostnames. The latter is a simple stub resolver and does no caching
1272     on its own. If you want DNS caching, you currently have to provide
1273     your own default resolver (by storing a suitable resolver object in
1274 root 1.92 C<$AnyEvent::DNS::RESOLVER>) or your own C<tcp_connect> callback.
1275 root 1.55
1276 root 1.2 =head2 GLOBAL FUNCTIONS AND VARIABLES
1277 root 1.1
1278     =over 4
1279    
1280 root 1.2 =item AnyEvent::HTTP::set_proxy "proxy-url"
1281    
1282     Sets the default proxy server to use. The proxy-url must begin with a
1283 root 1.92 string of the form C<http://host:port>, croaks otherwise.
1284 root 1.52
1285     To clear an already-set proxy, use C<undef>.
1286 root 1.2
1287 root 1.113 When AnyEvent::HTTP is loaded for the first time it will query the
1288 root 1.102 default proxy from the operating system, currently by looking at
1289     C<$ENV{http_proxy>}.
1290    
1291 root 1.80 =item AnyEvent::HTTP::cookie_jar_expire $jar[, $session_end]
1292    
1293     Remove all cookies from the cookie jar that have been expired. If
1294     C<$session_end> is given and true, then additionally remove all session
1295     cookies.
1296    
1297     You should call this function (with a true C<$session_end>) before you
1298     save cookies to disk, and you should call this function after loading them
1299 root 1.117 again. If you have a long-running program you can additionally call this
1300 root 1.80 function from time to time.
1301    
1302     A cookie jar is initially an empty hash-reference that is managed by this
1303 root 1.121 module. Its format is subject to change, but currently it is as follows:
1304 root 1.80
1305 root 1.131 The key C<version> has to contain C<2>, otherwise the hash gets
1306     cleared. All other keys are hostnames or IP addresses pointing to
1307 root 1.80 hash-references. The key for these inner hash references is the
1308     server path for which this cookie is meant, and the values are again
1309 root 1.115 hash-references. Each key of those hash-references is a cookie name, and
1310 root 1.80 the value, you guessed it, is another hash-reference, this time with the
1311     key-value pairs from the cookie, except for C<expires> and C<max-age>,
1312     which have been replaced by a C<_expires> key that contains the cookie
1313 root 1.115 expiry timestamp. Session cookies are indicated by not having an
1314     C<_expires> key.
1315 root 1.80
1316     Here is an example of a cookie jar with a single cookie, so you have a
1317     chance of understanding the above paragraph:
1318    
1319     {
1320 root 1.132 version => 2,
1321 root 1.80 "10.0.0.1" => {
1322     "/" => {
1323     "mythweb_id" => {
1324     _expires => 1293917923,
1325     value => "ooRung9dThee3ooyXooM1Ohm",
1326     },
1327     },
1328     },
1329     }
1330    
1331 root 1.61 =item $date = AnyEvent::HTTP::format_date $timestamp
1332    
1333     Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
1334     Date (RFC 2616).
1335    
1336     =item $timestamp = AnyEvent::HTTP::parse_date $date
1337    
1338 root 1.79 Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
1339     bunch of minor variations of those, and returns the corresponding POSIX
1340     timestamp, or C<undef> if the date cannot be parsed.
1341 root 1.61
1342 root 1.3 =item $AnyEvent::HTTP::MAX_RECURSE
1343 root 1.1
1344 root 1.3 The default value for the C<recurse> request parameter (default: C<10>).
1345 root 1.1
1346 root 1.92 =item $AnyEvent::HTTP::TIMEOUT
1347    
1348 root 1.110 The default timeout for connection operations (default: C<300>).
1349 root 1.92
1350 root 1.1 =item $AnyEvent::HTTP::USERAGENT
1351    
1352     The default value for the C<User-Agent> header (the default is
1353 root 1.40 C<Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
1354 root 1.1
1355 root 1.43 =item $AnyEvent::HTTP::MAX_PER_HOST
1356 root 1.1
1357 root 1.47 The maximum number of concurrent connections to the same host (identified
1358 root 1.121 by the hostname). If the limit is exceeded, then additional requests
1359 root 1.92 are queued until previous connections are closed. Both persistent and
1360     non-persistent connections are counted in this limit.
1361 root 1.1
1362 root 1.43 The default value for this is C<4>, and it is highly advisable to not
1363 root 1.92 increase it much.
1364    
1365     For comparison: the RFC's recommend 4 non-persistent or 2 persistent
1366 root 1.117 connections, older browsers used 2, newer ones (such as firefox 3)
1367     typically use 6, and Opera uses 8 because like, they have the fastest
1368     browser and give a shit for everybody else on the planet.
1369 root 1.92
1370     =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
1371    
1372 root 1.116 The time after which idle persistent connections get closed by
1373 root 1.92 AnyEvent::HTTP (default: C<3>).
1374 root 1.3
1375 root 1.14 =item $AnyEvent::HTTP::ACTIVE
1376    
1377     The number of active connections. This is not the number of currently
1378     running requests, but the number of currently open and non-idle TCP
1379 root 1.92 connections. This number can be useful for load-leveling.
1380 root 1.14
1381 root 1.1 =back
1382    
1383     =cut
1384    
1385 root 1.61 our @month = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
1386     our @weekday = qw(Sun Mon Tue Wed Thu Fri Sat);
1387    
1388     sub format_date($) {
1389     my ($time) = @_;
1390    
1391     # RFC 822/1123 format
1392     my ($S, $M, $H, $mday, $mon, $year, $wday, $yday, undef) = gmtime $time;
1393    
1394     sprintf "%s, %02d %s %04d %02d:%02d:%02d GMT",
1395     $weekday[$wday], $mday, $month[$mon], $year + 1900,
1396     $H, $M, $S;
1397     }
1398    
1399     sub parse_date($) {
1400     my ($date) = @_;
1401    
1402     my ($d, $m, $y, $H, $M, $S);
1403    
1404 root 1.79 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1405 root 1.70 # RFC 822/1123, required by RFC 2616 (with " ")
1406     # cookie dates (with "-")
1407    
1408 root 1.61 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
1409    
1410 root 1.79 } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1411 root 1.61 # RFC 850
1412     ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
1413    
1414 root 1.79 } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
1415 root 1.61 # ISO C's asctime
1416     ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
1417     }
1418     # other formats fail in the loop below
1419    
1420     for (0..11) {
1421     if ($m eq $month[$_]) {
1422     require Time::Local;
1423 root 1.111 return eval { Time::Local::timegm ($S, $M, $H, $d, $_, $y) };
1424 root 1.61 }
1425     }
1426    
1427     undef
1428     }
1429    
1430 root 1.2 sub set_proxy($) {
1431 root 1.52 if (length $_[0]) {
1432 root 1.92 $_[0] =~ m%^(http):// ([^:/]+) (?: : (\d*) )?%ix
1433 root 1.52 or Carp::croak "$_[0]: invalid proxy URL";
1434     $PROXY = [$2, $3 || 3128, $1]
1435     } else {
1436     undef $PROXY;
1437     }
1438 root 1.2 }
1439    
1440     # initialise proxy from environment
1441 root 1.52 eval {
1442     set_proxy $ENV{http_proxy};
1443     };
1444 root 1.2
1445 root 1.93 =head2 SHOWCASE
1446    
1447 root 1.117 This section contains some more elaborate "real-world" examples or code
1448 root 1.93 snippets.
1449    
1450     =head2 HTTP/1.1 FILE DOWNLOAD
1451    
1452 root 1.96 Downloading files with HTTP can be quite tricky, especially when something
1453 root 1.98 goes wrong and you want to resume.
1454 root 1.93
1455     Here is a function that initiates and resumes a download. It uses the
1456     last modified time to check for file content changes, and works with many
1457     HTTP/1.0 servers as well, and usually falls back to a complete re-download
1458     on older servers.
1459    
1460     It calls the completion callback with either C<undef>, which means a
1461 root 1.117 nonretryable error occurred, C<0> when the download was partial and should
1462 root 1.93 be retried, and C<1> if it was successful.
1463    
1464     use AnyEvent::HTTP;
1465    
1466     sub download($$$) {
1467     my ($url, $file, $cb) = @_;
1468    
1469     open my $fh, "+<", $file
1470     or die "$file: $!";
1471    
1472     my %hdr;
1473     my $ofs = 0;
1474    
1475     if (stat $fh and -s _) {
1476     $ofs = -s _;
1477 root 1.107 warn "-s is ", $ofs;
1478 root 1.93 $hdr{"if-unmodified-since"} = AnyEvent::HTTP::format_date +(stat _)[9];
1479     $hdr{"range"} = "bytes=$ofs-";
1480     }
1481    
1482     http_get $url,
1483     headers => \%hdr,
1484     on_header => sub {
1485     my ($hdr) = @_;
1486    
1487     if ($hdr->{Status} == 200 && $ofs) {
1488     # resume failed
1489     truncate $fh, $ofs = 0;
1490     }
1491    
1492     sysseek $fh, $ofs, 0;
1493    
1494     1
1495     },
1496     on_body => sub {
1497     my ($data, $hdr) = @_;
1498    
1499     if ($hdr->{Status} =~ /^2/) {
1500     length $data == syswrite $fh, $data
1501     or return; # abort on write errors
1502     }
1503    
1504     1
1505     },
1506     sub {
1507     my (undef, $hdr) = @_;
1508    
1509     my $status = $hdr->{Status};
1510    
1511     if (my $time = AnyEvent::HTTP::parse_date $hdr->{"last-modified"}) {
1512 root 1.127 utime $time, $time, $fh;
1513 root 1.93 }
1514    
1515     if ($status == 200 || $status == 206 || $status == 416) {
1516     # download ok || resume ok || file already fully downloaded
1517     $cb->(1, $hdr);
1518    
1519     } elsif ($status == 412) {
1520     # file has changed while resuming, delete and retry
1521     unlink $file;
1522     $cb->(0, $hdr);
1523    
1524     } elsif ($status == 500 or $status == 503 or $status =~ /^59/) {
1525     # retry later
1526     $cb->(0, $hdr);
1527    
1528     } else {
1529     $cb->(undef, $hdr);
1530     }
1531     }
1532     ;
1533     }
1534    
1535     download "http://server/somelargefile", "/tmp/somelargefile", sub {
1536     if ($_[0]) {
1537     print "OK!\n";
1538     } elsif (defined $_[0]) {
1539     print "please retry later\n";
1540     } else {
1541     print "ERROR\n";
1542     }
1543     };
1544    
1545     =head3 SOCKS PROXIES
1546 root 1.60
1547     Socks proxies are not directly supported by AnyEvent::HTTP. You can
1548     compile your perl to support socks, or use an external program such as
1549     F<socksify> (dante) or F<tsocks> to make your program use a socks proxy
1550     transparently.
1551    
1552     Alternatively, for AnyEvent::HTTP only, you can use your own
1553     C<tcp_connect> function that does the proxy handshake - here is an example
1554     that works with socks4a proxies:
1555    
1556     use Errno;
1557     use AnyEvent::Util;
1558     use AnyEvent::Socket;
1559     use AnyEvent::Handle;
1560    
1561     # host, port and username of/for your socks4a proxy
1562     my $socks_host = "10.0.0.23";
1563     my $socks_port = 9050;
1564     my $socks_user = "";
1565    
1566     sub socks4a_connect {
1567     my ($host, $port, $connect_cb, $prepare_cb) = @_;
1568    
1569     my $hdl = new AnyEvent::Handle
1570     connect => [$socks_host, $socks_port],
1571     on_prepare => sub { $prepare_cb->($_[0]{fh}) },
1572     on_error => sub { $connect_cb->() },
1573     ;
1574    
1575     $hdl->push_write (pack "CCnNZ*Z*", 4, 1, $port, 1, $socks_user, $host);
1576    
1577     $hdl->push_read (chunk => 8, sub {
1578     my ($hdl, $chunk) = @_;
1579     my ($status, $port, $ipn) = unpack "xCna4", $chunk;
1580    
1581     if ($status == 0x5a) {
1582     $connect_cb->($hdl->{fh}, (format_address $ipn) . ":$port");
1583     } else {
1584     $! = Errno::ENXIO; $connect_cb->();
1585     }
1586     });
1587    
1588     $hdl
1589     }
1590    
1591     Use C<socks4a_connect> instead of C<tcp_connect> when doing C<http_request>s,
1592     possibly after switching off other proxy types:
1593    
1594     AnyEvent::HTTP::set_proxy undef; # usually you do not want other proxies
1595    
1596     http_get 'http://www.google.com', tcp_connect => \&socks4a_connect, sub {
1597     my ($data, $headers) = @_;
1598     ...
1599     };
1600    
1601 root 1.1 =head1 SEE ALSO
1602    
1603     L<AnyEvent>.
1604    
1605     =head1 AUTHOR
1606    
1607 root 1.18 Marc Lehmann <schmorp@schmorp.de>
1608     http://home.schmorp.de/
1609 root 1.1
1610 root 1.36 With many thanks to Дмитрий Шалашов, who provided countless
1611     testcases and bugreports.
1612    
1613 root 1.1 =cut
1614    
1615     1
1616