ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
Revision: 1.10
Committed: Thu Jun 5 13:06:43 2008 UTC (15 years, 11 months ago) by root
Branch: MAIN
Changes since 1.9: +102 -36 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4    
5     =head1 SYNOPSIS
6    
7     use AnyEvent::HTTP;
8    
9     =head1 DESCRIPTION
10    
11     This module is an L<AnyEvent> user, you need to make sure that you use and
12     run a supported event loop.
13    
14     =head2 METHODS
15    
16     =over 4
17    
18     =cut
19    
20     package AnyEvent::HTTP;
21    
22     use strict;
23     no warnings;
24    
25     use Carp;
26    
27     use AnyEvent ();
28     use AnyEvent::Util ();
29     use AnyEvent::Socket ();
30     use AnyEvent::Handle ();
31    
32     use base Exporter::;
33    
34     our $VERSION = '1.0';
35    
36     our @EXPORT = qw(http_get http_request);
37    
38     our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39 root 1.3 our $MAX_RECURSE = 10;
40 root 1.2 our $MAX_PERSISTENT = 8;
41     our $PERSISTENT_TIMEOUT = 2;
42     our $TIMEOUT = 300;
43 root 1.1
44     # changing these is evil
45     our $MAX_PERSISTENT_PER_HOST = 2;
46     our $MAX_PER_HOST = 4; # not respected yet :(
47    
48 root 1.2 our $PROXY;
49    
50 root 1.1 my %KA_COUNT; # number of open keep-alive connections per host
51    
52     =item http_get $url, key => value..., $cb->($data, $headers)
53    
54     Executes an HTTP-GET request. See the http_request function for details on
55     additional parameters.
56    
57 root 1.5 =item http_head $url, key => value..., $cb->($data, $headers)
58    
59     Executes an HTTP-HEAD request. See the http_request function for details on
60     additional parameters.
61    
62     =item http_post $url, $body, key => value..., $cb->($data, $headers)
63 root 1.3
64 root 1.7 Executes an HTTP-POST request with a request body of C<$bod>. See the
65 root 1.3 http_request function for details on additional parameters.
66    
67 root 1.1 =item http_request $method => $url, key => value..., $cb->($data, $headers)
68    
69     Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
70     must be an absolute http or https URL.
71    
72 root 1.2 The callback will be called with the response data as first argument
73     (or C<undef> if it wasn't available due to errors), and a hash-ref with
74     response headers as second argument.
75    
76 root 1.7 All the headers in that hash are lowercased. In addition to the response
77 root 1.2 headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
78     C<Reason> contain the three parts of the HTTP Status-Line of the same
79 root 1.10 name. If the server sends a header multiple lines, then their contents
80     will be joined together with C<\x00>.
81 root 1.2
82     If an internal error occurs, such as not being able to resolve a hostname,
83     then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
84     and the C<Reason> pseudo-header will contain an error message.
85    
86 root 1.6 A typical callback might look like this:
87    
88     sub {
89     my ($body, $hdr) = @_;
90    
91     if ($hdr->{Status} =~ /^2/) {
92     ... everything should be ok
93     } else {
94     print "error, $hdr->{Status} $hdr->{Reason}\n";
95     }
96     }
97    
98 root 1.1 Additional parameters are key-value pairs, and are fully optional. They
99     include:
100    
101     =over 4
102    
103 root 1.3 =item recurse => $count (default: $MAX_RECURSE)
104 root 1.1
105     Whether to recurse requests or not, e.g. on redirects, authentication
106 root 1.3 retries and so on, and how often to do so.
107 root 1.1
108     =item headers => hashref
109    
110     The request headers to use.
111    
112     =item timeout => $seconds
113    
114     The time-out to use for various stages - each connect attempt will reset
115 root 1.2 the timeout, as will read or write activity. Default timeout is 5 minutes.
116    
117     =item proxy => [$host, $port[, $scheme]] or undef
118    
119     Use the given http proxy for all requests. If not specified, then the
120     default proxy (as specified by C<$ENV{http_proxy}>) is used.
121    
122     C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
123     HTTPS.
124 root 1.1
125 root 1.3 =item body => $string
126    
127     The request body, usually empty. Will be-sent as-is (future versions of
128     this module might offer more options).
129    
130 root 1.10 =item cookie_jar => $hash_ref
131    
132     Passing this parameter enables (simplified) cookie-processing, loosely
133     based on the original netscape specification.
134    
135     The C<$hash_ref> must be an (initially empty) hash reference which will
136     get updated automatically. It is possible to save the cookie_jar to
137     persistent storage with something like JSON or Storable, but this is not
138     recommended, as expire times are currently being ignored.
139    
140     Note that this cookie implementation is not of very high quality, nor
141     meant to be complete. If you want complete cookie management you have to
142     do that on your own. C<cookie_jar> is meant as a quick fix to get some
143     cookie-using sites working. Cookies are a privacy disaster, do not use
144     them unless required to.
145    
146 root 1.1 =back
147    
148 root 1.9 Example: make a simple HTTP GET request for http://www.nethype.de/
149    
150     http_request GET => "http://www.nethype.de/", sub {
151     my ($body, $hdr) = @_;
152     print "$body\n";
153     };
154    
155     Example: make a HTTP HEAD request on https://www.google.com/, use a
156     timeout of 30 seconds.
157    
158     http_request
159     GET => "https://www.google.com",
160     timeout => 30,
161     sub {
162     my ($body, $hdr) = @_;
163     use Data::Dumper;
164     print Dumper $hdr;
165     }
166     ;
167 root 1.1
168     =cut
169    
170     sub http_request($$$;@) {
171     my $cb = pop;
172     my ($method, $url, %arg) = @_;
173    
174     my %hdr;
175    
176 root 1.3 $method = uc $method;
177    
178 root 1.8 if (my $hdr = $arg{headers}) {
179 root 1.1 while (my ($k, $v) = each %$hdr) {
180     $hdr{lc $k} = $v;
181     }
182     }
183    
184 root 1.8 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
185    
186     return $cb->(undef, { Status => 599, Reason => "recursion limit reached" })
187     if $recurse < 0;
188    
189 root 1.2 my $proxy = $arg{proxy} || $PROXY;
190 root 1.1 my $timeout = $arg{timeout} || $TIMEOUT;
191    
192     $hdr{"user-agent"} ||= $USERAGENT;
193    
194 root 1.10 my ($scheme, $authority, $upath, $query, $fragment) =
195     $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
196 root 1.2
197 root 1.10 $scheme = lc $scheme;
198 root 1.1
199 root 1.10 my $uport = $scheme eq "http" ? 80
200     : $scheme eq "https" ? 443
201     : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported" });
202    
203     $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
204     or return $cb->(undef, { Status => 599, Reason => "unparsable URL" });
205    
206     my $uhost = $1;
207     $uport = $2 if defined $2;
208    
209     $uhost =~ s/^\[(.*)\]$/$1/;
210     $upath .= "?$query" if length $query;
211    
212     $upath =~ s%^/?%/%;
213    
214     # cookie processing
215     if (my $jar = $arg{cookie_jar}) {
216     %$jar = () if $jar->{version} < 1;
217    
218     my @cookie;
219    
220     while (my ($chost, $v) = each %$jar) {
221     next unless $chost eq substr $uhost, -length $chost;
222     next unless $chost =~ /^\./;
223    
224     while (my ($cpath, $v) = each %$v) {
225     next unless $cpath eq substr $upath, 0, length $cpath;
226    
227     while (my ($k, $v) = each %$v) {
228     next if $scheme ne "https" && exists $v->{secure};
229     push @cookie, "$k=$v->{value}";
230     }
231     }
232     }
233    
234     $hdr{cookie} = join "; ", @cookie
235     if @cookie;
236     }
237 root 1.1
238 root 1.10 my ($rhost, $rport, $rpath); # request host, port, path
239 root 1.2
240 root 1.10 if ($proxy) {
241     ($rhost, $rport, $scheme) = @$proxy;
242     $rpath = $url;
243     } else {
244     ($rhost, $rport, $rpath) = ($uhost, $uport, $upath);
245     $hdr{host} = $uhost;
246 root 1.2 }
247    
248 root 1.10 $hdr{"content-length"} = length $arg{body};
249 root 1.1
250     my %state;
251    
252 root 1.10 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
253 root 1.1 $state{fh} = shift
254     or return $cb->(undef, { Status => 599, Reason => "$!" });
255    
256     delete $state{connect_guard}; # reduce memory usage, save a tree
257    
258     # get handle
259     $state{handle} = new AnyEvent::Handle
260     fh => $state{fh},
261     ($scheme eq "https" ? (tls => "connect") : ());
262    
263     # limit the number of persistent connections
264     if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
265     ++$KA_COUNT{$_[1]};
266     $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
267     $hdr{connection} = "keep-alive";
268 root 1.2 delete $hdr{connection}; # keep-alive not yet supported
269 root 1.1 } else {
270     delete $hdr{connection};
271     }
272    
273     # (re-)configure handle
274     $state{handle}->timeout ($timeout);
275     $state{handle}->on_error (sub {
276     %state = ();
277     $cb->(undef, { Status => 599, Reason => "$!" });
278     });
279     $state{handle}->on_eof (sub {
280     %state = ();
281     $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
282     });
283    
284     # send request
285     $state{handle}->push_write (
286 root 1.10 "$method $rpath HTTP/1.0\015\012"
287 root 1.1 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
288     . "\015\012"
289 root 1.8 . (delete $arg{body})
290 root 1.1 );
291    
292     %hdr = (); # reduce memory usage, save a kitten
293    
294     # status line
295     $state{handle}->push_read (line => qr/\015?\012/, sub {
296     $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
297     or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
298    
299     my %hdr = ( # response headers
300 root 1.10 HTTPVersion => "\x00$1",
301     Status => "\x00$2",
302     Reason => "\x00$3",
303 root 1.1 );
304    
305     # headers, could be optimized a bit
306     $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
307     for ("$_[1]\012") {
308 root 1.2 # we support spaces in field names, as lotus domino
309     # creates them.
310 root 1.10 $hdr{lc $1} .= "\x00$2"
311 root 1.1 while /\G
312 root 1.2 ([^:\000-\037]+):
313 root 1.1 [\011\040]*
314     ((?: [^\015\012]+ | \015?\012[\011\040] )*)
315     \015?\012
316     /gxc;
317    
318     /\G$/
319 root 1.10 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" }));
320 root 1.1 }
321    
322     substr $_, 0, 1, ""
323     for values %hdr;
324    
325 root 1.8 my $finish = sub {
326 root 1.10 %state = ();
327    
328     # set-cookie processing
329     if ($arg{cookie_jar} && exists $hdr{"set-cookie"}) {
330     for (split /\x00/, $hdr{"set-cookie"}) {
331     my ($cookie, @arg) = split /;\s*/;
332     my ($name, $value) = split /=/, $cookie, 2;
333     my %kv = (value => $value, map { split /=/, $_, 2 } @arg);
334    
335     my $cdom = (delete $kv{domain}) || $uhost;
336     my $cpath = (delete $kv{path}) || "/";
337    
338     $cdom =~ s/^.?/./; # make sure it starts with a "."
339    
340     my $ndots = $cdom =~ y/.//;
341     next if $ndots < ($cdom =~ /[^.]{3}$/ ? 2 : 3);
342    
343     # store it
344     $arg{cookie_jar}{version} = 1;
345     $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
346     }
347     }
348    
349     if ($_[1]{Status} =~ /^x30[12]$/ && $recurse) {
350     # microsoft and other assholes don't give a shit for following standards,
351     # try to support a common form of broken Location header.
352     $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
353    
354 root 1.8 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
355     } else {
356     $cb->($_[0], $_[1]);
357     }
358     };
359    
360     if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
361     $finish->(undef, \%hdr);
362 root 1.4 } else {
363 root 1.3 if (exists $hdr{"content-length"}) {
364     $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
365     # could cache persistent connection now
366     if ($hdr{connection} =~ /\bkeep-alive\b/i) {
367     # but we don't, due to misdesigns, this is annoyingly complex
368     };
369    
370 root 1.8 $finish->($_[1], \%hdr);
371 root 1.3 });
372     } else {
373     # too bad, need to read until we get an error or EOF,
374     # no way to detect winged data.
375     $_[0]->on_error (sub {
376 root 1.8 $finish->($_[0]{rbuf}, \%hdr);
377 root 1.3 });
378     $_[0]->on_eof (undef);
379     $_[0]->on_read (sub { });
380     }
381 root 1.1 }
382     });
383     });
384     }, sub {
385     $timeout
386     };
387    
388     defined wantarray && AnyEvent::Util::guard { %state = () }
389     }
390    
391     sub http_get($$;@) {
392     unshift @_, "GET";
393     &http_request
394     }
395    
396 root 1.4 sub http_head($$;@) {
397     unshift @_, "HEAD";
398     &http_request
399     }
400    
401 root 1.3 sub http_post($$$;@) {
402     unshift @_, "POST", "body";
403     &http_request
404     }
405    
406 root 1.9 =back
407    
408 root 1.2 =head2 GLOBAL FUNCTIONS AND VARIABLES
409 root 1.1
410     =over 4
411    
412 root 1.2 =item AnyEvent::HTTP::set_proxy "proxy-url"
413    
414     Sets the default proxy server to use. The proxy-url must begin with a
415     string of the form C<http://host:port> (optionally C<https:...>).
416    
417 root 1.3 =item $AnyEvent::HTTP::MAX_RECURSE
418 root 1.1
419 root 1.3 The default value for the C<recurse> request parameter (default: C<10>).
420 root 1.1
421     =item $AnyEvent::HTTP::USERAGENT
422    
423     The default value for the C<User-Agent> header (the default is
424     C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
425    
426     =item $AnyEvent::HTTP::MAX_PERSISTENT
427    
428     The maximum number of persistent connections to keep open (default: 8).
429    
430 root 1.3 Not implemented currently.
431    
432 root 1.1 =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
433    
434 root 1.2 The maximum time to cache a persistent connection, in seconds (default: 2).
435 root 1.1
436 root 1.3 Not implemented currently.
437    
438 root 1.1 =back
439    
440     =cut
441    
442 root 1.2 sub set_proxy($) {
443     $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
444     }
445    
446     # initialise proxy from environment
447     set_proxy $ENV{http_proxy};
448    
449 root 1.1 =head1 SEE ALSO
450    
451     L<AnyEvent>.
452    
453     =head1 AUTHOR
454    
455     Marc Lehmann <schmorp@schmorp.de>
456     http://home.schmorp.de/
457    
458     =cut
459    
460     1
461