ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
Revision: 1.9
Committed: Wed Jun 4 13:51:53 2008 UTC (15 years, 11 months ago) by root
Branch: MAIN
Changes since 1.8: +21 -1 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4    
5     =head1 SYNOPSIS
6    
7     use AnyEvent::HTTP;
8    
9     =head1 DESCRIPTION
10    
11     This module is an L<AnyEvent> user, you need to make sure that you use and
12     run a supported event loop.
13    
14     =head2 METHODS
15    
16     =over 4
17    
18     =cut
19    
20     package AnyEvent::HTTP;
21    
22     use strict;
23     no warnings;
24    
25     use Carp;
26    
27     use AnyEvent ();
28     use AnyEvent::Util ();
29     use AnyEvent::Socket ();
30     use AnyEvent::Handle ();
31    
32     use base Exporter::;
33    
34     our $VERSION = '1.0';
35    
36     our @EXPORT = qw(http_get http_request);
37    
38     our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39 root 1.3 our $MAX_RECURSE = 10;
40 root 1.2 our $MAX_PERSISTENT = 8;
41     our $PERSISTENT_TIMEOUT = 2;
42     our $TIMEOUT = 300;
43 root 1.1
44     # changing these is evil
45     our $MAX_PERSISTENT_PER_HOST = 2;
46     our $MAX_PER_HOST = 4; # not respected yet :(
47    
48 root 1.2 our $PROXY;
49    
50 root 1.1 my %KA_COUNT; # number of open keep-alive connections per host
51    
52     =item http_get $url, key => value..., $cb->($data, $headers)
53    
54     Executes an HTTP-GET request. See the http_request function for details on
55     additional parameters.
56    
57 root 1.5 =item http_head $url, key => value..., $cb->($data, $headers)
58    
59     Executes an HTTP-HEAD request. See the http_request function for details on
60     additional parameters.
61    
62     =item http_post $url, $body, key => value..., $cb->($data, $headers)
63 root 1.3
64 root 1.7 Executes an HTTP-POST request with a request body of C<$bod>. See the
65 root 1.3 http_request function for details on additional parameters.
66    
67 root 1.1 =item http_request $method => $url, key => value..., $cb->($data, $headers)
68    
69     Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
70     must be an absolute http or https URL.
71    
72 root 1.2 The callback will be called with the response data as first argument
73     (or C<undef> if it wasn't available due to errors), and a hash-ref with
74     response headers as second argument.
75    
76 root 1.7 All the headers in that hash are lowercased. In addition to the response
77 root 1.2 headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
78     C<Reason> contain the three parts of the HTTP Status-Line of the same
79     name.
80    
81     If an internal error occurs, such as not being able to resolve a hostname,
82     then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
83     and the C<Reason> pseudo-header will contain an error message.
84    
85 root 1.6 A typical callback might look like this:
86    
87     sub {
88     my ($body, $hdr) = @_;
89    
90     if ($hdr->{Status} =~ /^2/) {
91     ... everything should be ok
92     } else {
93     print "error, $hdr->{Status} $hdr->{Reason}\n";
94     }
95     }
96    
97 root 1.1 Additional parameters are key-value pairs, and are fully optional. They
98     include:
99    
100     =over 4
101    
102 root 1.3 =item recurse => $count (default: $MAX_RECURSE)
103 root 1.1
104     Whether to recurse requests or not, e.g. on redirects, authentication
105 root 1.3 retries and so on, and how often to do so.
106 root 1.1
107     =item headers => hashref
108    
109     The request headers to use.
110    
111     =item timeout => $seconds
112    
113     The time-out to use for various stages - each connect attempt will reset
114 root 1.2 the timeout, as will read or write activity. Default timeout is 5 minutes.
115    
116     =item proxy => [$host, $port[, $scheme]] or undef
117    
118     Use the given http proxy for all requests. If not specified, then the
119     default proxy (as specified by C<$ENV{http_proxy}>) is used.
120    
121     C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
122     HTTPS.
123 root 1.1
124 root 1.3 =item body => $string
125    
126     The request body, usually empty. Will be-sent as-is (future versions of
127     this module might offer more options).
128    
129 root 1.1 =back
130    
131 root 1.9 Example: make a simple HTTP GET request for http://www.nethype.de/
132    
133     http_request GET => "http://www.nethype.de/", sub {
134     my ($body, $hdr) = @_;
135     print "$body\n";
136     };
137    
138     Example: make a HTTP HEAD request on https://www.google.com/, use a
139     timeout of 30 seconds.
140    
141     http_request
142     GET => "https://www.google.com",
143     timeout => 30,
144     sub {
145     my ($body, $hdr) = @_;
146     use Data::Dumper;
147     print Dumper $hdr;
148     }
149     ;
150 root 1.1
151     =cut
152    
153     sub http_request($$$;@) {
154     my $cb = pop;
155     my ($method, $url, %arg) = @_;
156    
157     my %hdr;
158    
159 root 1.3 $method = uc $method;
160    
161 root 1.8 if (my $hdr = $arg{headers}) {
162 root 1.1 while (my ($k, $v) = each %$hdr) {
163     $hdr{lc $k} = $v;
164     }
165     }
166    
167 root 1.8 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
168    
169     return $cb->(undef, { Status => 599, Reason => "recursion limit reached" })
170     if $recurse < 0;
171    
172 root 1.2 my $proxy = $arg{proxy} || $PROXY;
173 root 1.1 my $timeout = $arg{timeout} || $TIMEOUT;
174    
175     $hdr{"user-agent"} ||= $USERAGENT;
176    
177 root 1.2 my ($host, $port, $path, $scheme);
178    
179     if ($proxy) {
180     ($host, $port, $scheme) = @$proxy;
181     $path = $url;
182     } else {
183     ($scheme, my $authority, $path, my $query, my $fragment) =
184     $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
185 root 1.1
186 root 1.2 $port = $scheme eq "http" ? 80
187 root 1.1 : $scheme eq "https" ? 443
188 root 1.8 : return $cb->(undef, { Status => 599, Reason => "$url: only http and https URLs supported" });
189 root 1.1
190 root 1.2 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
191 root 1.8 or return $cb->(undef, { Status => 599, Reason => "$url: unparsable URL" });
192 root 1.2
193     $host = $1;
194     $port = $2 if defined $2;
195 root 1.1
196 root 1.2 $host =~ s/^\[(.*)\]$/$1/;
197     $path .= "?$query" if length $query;
198 root 1.1
199 root 1.2 $path = "/" unless $path;
200 root 1.1
201 root 1.2 $hdr{host} = $host = lc $host;
202     }
203    
204     $scheme = lc $scheme;
205 root 1.1
206     my %state;
207    
208 root 1.8 $hdr{"content-length"} = length $arg{body};
209 root 1.1
210     $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
211     $state{fh} = shift
212     or return $cb->(undef, { Status => 599, Reason => "$!" });
213    
214     delete $state{connect_guard}; # reduce memory usage, save a tree
215    
216     # get handle
217     $state{handle} = new AnyEvent::Handle
218     fh => $state{fh},
219     ($scheme eq "https" ? (tls => "connect") : ());
220    
221     # limit the number of persistent connections
222     if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
223     ++$KA_COUNT{$_[1]};
224     $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
225     $hdr{connection} = "keep-alive";
226 root 1.2 delete $hdr{connection}; # keep-alive not yet supported
227 root 1.1 } else {
228     delete $hdr{connection};
229     }
230    
231     # (re-)configure handle
232     $state{handle}->timeout ($timeout);
233     $state{handle}->on_error (sub {
234     %state = ();
235     $cb->(undef, { Status => 599, Reason => "$!" });
236     });
237     $state{handle}->on_eof (sub {
238     %state = ();
239     $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
240     });
241    
242     # send request
243     $state{handle}->push_write (
244 root 1.3 "$method $path HTTP/1.0\015\012"
245 root 1.1 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
246     . "\015\012"
247 root 1.8 . (delete $arg{body})
248 root 1.1 );
249    
250     %hdr = (); # reduce memory usage, save a kitten
251    
252     # status line
253     $state{handle}->push_read (line => qr/\015?\012/, sub {
254     $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
255     or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
256    
257     my %hdr = ( # response headers
258     HTTPVersion => ",$1",
259     Status => ",$2",
260     Reason => ",$3",
261     );
262    
263     # headers, could be optimized a bit
264     $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
265     for ("$_[1]\012") {
266 root 1.2 # we support spaces in field names, as lotus domino
267     # creates them.
268 root 1.1 $hdr{lc $1} .= ",$2"
269     while /\G
270 root 1.2 ([^:\000-\037]+):
271 root 1.1 [\011\040]*
272     ((?: [^\015\012]+ | \015?\012[\011\040] )*)
273     \015?\012
274     /gxc;
275    
276     /\G$/
277     or return $cb->(undef, { Status => 599, Reason => "garbled response headers" });
278     }
279    
280     substr $_, 0, 1, ""
281     for values %hdr;
282    
283 root 1.8 my $finish = sub {
284     if ($_[1]{Status} =~ /^30[12]$/ && $recurse) {
285     http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
286     } else {
287     $cb->($_[0], $_[1]);
288     }
289     };
290    
291     if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
292 root 1.4 %state = ();
293 root 1.8 $finish->(undef, \%hdr);
294 root 1.4 } else {
295 root 1.3 if (exists $hdr{"content-length"}) {
296     $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
297     # could cache persistent connection now
298     if ($hdr{connection} =~ /\bkeep-alive\b/i) {
299     # but we don't, due to misdesigns, this is annoyingly complex
300     };
301    
302     %state = ();
303 root 1.8 $finish->($_[1], \%hdr);
304 root 1.3 });
305     } else {
306     # too bad, need to read until we get an error or EOF,
307     # no way to detect winged data.
308     $_[0]->on_error (sub {
309     %state = ();
310 root 1.8 $finish->($_[0]{rbuf}, \%hdr);
311 root 1.3 });
312     $_[0]->on_eof (undef);
313     $_[0]->on_read (sub { });
314     }
315 root 1.1 }
316     });
317     });
318     }, sub {
319     $timeout
320     };
321    
322     defined wantarray && AnyEvent::Util::guard { %state = () }
323     }
324    
325     sub http_get($$;@) {
326     unshift @_, "GET";
327     &http_request
328     }
329    
330 root 1.4 sub http_head($$;@) {
331     unshift @_, "HEAD";
332     &http_request
333     }
334    
335 root 1.3 sub http_post($$$;@) {
336     unshift @_, "POST", "body";
337     &http_request
338     }
339    
340 root 1.9 =back
341    
342 root 1.2 =head2 GLOBAL FUNCTIONS AND VARIABLES
343 root 1.1
344     =over 4
345    
346 root 1.2 =item AnyEvent::HTTP::set_proxy "proxy-url"
347    
348     Sets the default proxy server to use. The proxy-url must begin with a
349     string of the form C<http://host:port> (optionally C<https:...>).
350    
351 root 1.3 =item $AnyEvent::HTTP::MAX_RECURSE
352 root 1.1
353 root 1.3 The default value for the C<recurse> request parameter (default: C<10>).
354 root 1.1
355     =item $AnyEvent::HTTP::USERAGENT
356    
357     The default value for the C<User-Agent> header (the default is
358     C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
359    
360     =item $AnyEvent::HTTP::MAX_PERSISTENT
361    
362     The maximum number of persistent connections to keep open (default: 8).
363    
364 root 1.3 Not implemented currently.
365    
366 root 1.1 =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
367    
368 root 1.2 The maximum time to cache a persistent connection, in seconds (default: 2).
369 root 1.1
370 root 1.3 Not implemented currently.
371    
372 root 1.1 =back
373    
374     =cut
375    
376 root 1.2 sub set_proxy($) {
377     $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
378     }
379    
380     # initialise proxy from environment
381     set_proxy $ENV{http_proxy};
382    
383 root 1.1 =head1 SEE ALSO
384    
385     L<AnyEvent>.
386    
387     =head1 AUTHOR
388    
389     Marc Lehmann <schmorp@schmorp.de>
390     http://home.schmorp.de/
391    
392     =cut
393    
394     1
395