ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
Revision: 1.8
Committed: Wed Jun 4 12:32:30 2008 UTC (15 years, 11 months ago) by root
Branch: MAIN
Changes since 1.7: +22 -12 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4    
5     =head1 SYNOPSIS
6    
7     use AnyEvent::HTTP;
8    
9     =head1 DESCRIPTION
10    
11     This module is an L<AnyEvent> user, you need to make sure that you use and
12     run a supported event loop.
13    
14     =head2 METHODS
15    
16     =over 4
17    
18     =cut
19    
20     package AnyEvent::HTTP;
21    
22     use strict;
23     no warnings;
24    
25     use Carp;
26    
27     use AnyEvent ();
28     use AnyEvent::Util ();
29     use AnyEvent::Socket ();
30     use AnyEvent::Handle ();
31    
32     use base Exporter::;
33    
34     our $VERSION = '1.0';
35    
36     our @EXPORT = qw(http_get http_request);
37    
38     our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39 root 1.3 our $MAX_RECURSE = 10;
40 root 1.2 our $MAX_PERSISTENT = 8;
41     our $PERSISTENT_TIMEOUT = 2;
42     our $TIMEOUT = 300;
43 root 1.1
44     # changing these is evil
45     our $MAX_PERSISTENT_PER_HOST = 2;
46     our $MAX_PER_HOST = 4; # not respected yet :(
47    
48 root 1.2 our $PROXY;
49    
50 root 1.1 my %KA_COUNT; # number of open keep-alive connections per host
51    
52     =item http_get $url, key => value..., $cb->($data, $headers)
53    
54     Executes an HTTP-GET request. See the http_request function for details on
55     additional parameters.
56    
57 root 1.5 =item http_head $url, key => value..., $cb->($data, $headers)
58    
59     Executes an HTTP-HEAD request. See the http_request function for details on
60     additional parameters.
61    
62     =item http_post $url, $body, key => value..., $cb->($data, $headers)
63 root 1.3
64 root 1.7 Executes an HTTP-POST request with a request body of C<$bod>. See the
65 root 1.3 http_request function for details on additional parameters.
66    
67 root 1.1 =item http_request $method => $url, key => value..., $cb->($data, $headers)
68    
69     Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
70     must be an absolute http or https URL.
71    
72 root 1.2 The callback will be called with the response data as first argument
73     (or C<undef> if it wasn't available due to errors), and a hash-ref with
74     response headers as second argument.
75    
76 root 1.7 All the headers in that hash are lowercased. In addition to the response
77 root 1.2 headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
78     C<Reason> contain the three parts of the HTTP Status-Line of the same
79     name.
80    
81     If an internal error occurs, such as not being able to resolve a hostname,
82     then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
83     and the C<Reason> pseudo-header will contain an error message.
84    
85 root 1.6 A typical callback might look like this:
86    
87     sub {
88     my ($body, $hdr) = @_;
89    
90     if ($hdr->{Status} =~ /^2/) {
91     ... everything should be ok
92     } else {
93     print "error, $hdr->{Status} $hdr->{Reason}\n";
94     }
95     }
96    
97 root 1.1 Additional parameters are key-value pairs, and are fully optional. They
98     include:
99    
100     =over 4
101    
102 root 1.3 =item recurse => $count (default: $MAX_RECURSE)
103 root 1.1
104     Whether to recurse requests or not, e.g. on redirects, authentication
105 root 1.3 retries and so on, and how often to do so.
106 root 1.1
107     =item headers => hashref
108    
109     The request headers to use.
110    
111     =item timeout => $seconds
112    
113     The time-out to use for various stages - each connect attempt will reset
114 root 1.2 the timeout, as will read or write activity. Default timeout is 5 minutes.
115    
116     =item proxy => [$host, $port[, $scheme]] or undef
117    
118     Use the given http proxy for all requests. If not specified, then the
119     default proxy (as specified by C<$ENV{http_proxy}>) is used.
120    
121     C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
122     HTTPS.
123 root 1.1
124 root 1.3 =item body => $string
125    
126     The request body, usually empty. Will be-sent as-is (future versions of
127     this module might offer more options).
128    
129 root 1.1 =back
130    
131     =back
132    
133     =cut
134    
135     sub http_request($$$;@) {
136     my $cb = pop;
137     my ($method, $url, %arg) = @_;
138    
139     my %hdr;
140    
141 root 1.3 $method = uc $method;
142    
143 root 1.8 if (my $hdr = $arg{headers}) {
144 root 1.1 while (my ($k, $v) = each %$hdr) {
145     $hdr{lc $k} = $v;
146     }
147     }
148    
149 root 1.8 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
150    
151     return $cb->(undef, { Status => 599, Reason => "recursion limit reached" })
152     if $recurse < 0;
153    
154 root 1.2 my $proxy = $arg{proxy} || $PROXY;
155 root 1.1 my $timeout = $arg{timeout} || $TIMEOUT;
156    
157     $hdr{"user-agent"} ||= $USERAGENT;
158    
159 root 1.2 my ($host, $port, $path, $scheme);
160    
161     if ($proxy) {
162     ($host, $port, $scheme) = @$proxy;
163     $path = $url;
164     } else {
165     ($scheme, my $authority, $path, my $query, my $fragment) =
166     $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
167 root 1.1
168 root 1.2 $port = $scheme eq "http" ? 80
169 root 1.1 : $scheme eq "https" ? 443
170 root 1.8 : return $cb->(undef, { Status => 599, Reason => "$url: only http and https URLs supported" });
171 root 1.1
172 root 1.2 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
173 root 1.8 or return $cb->(undef, { Status => 599, Reason => "$url: unparsable URL" });
174 root 1.2
175     $host = $1;
176     $port = $2 if defined $2;
177 root 1.1
178 root 1.2 $host =~ s/^\[(.*)\]$/$1/;
179     $path .= "?$query" if length $query;
180 root 1.1
181 root 1.2 $path = "/" unless $path;
182 root 1.1
183 root 1.2 $hdr{host} = $host = lc $host;
184     }
185    
186     $scheme = lc $scheme;
187 root 1.1
188     my %state;
189    
190 root 1.8 $hdr{"content-length"} = length $arg{body};
191 root 1.1
192     $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
193     $state{fh} = shift
194     or return $cb->(undef, { Status => 599, Reason => "$!" });
195    
196     delete $state{connect_guard}; # reduce memory usage, save a tree
197    
198     # get handle
199     $state{handle} = new AnyEvent::Handle
200     fh => $state{fh},
201     ($scheme eq "https" ? (tls => "connect") : ());
202    
203     # limit the number of persistent connections
204     if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
205     ++$KA_COUNT{$_[1]};
206     $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
207     $hdr{connection} = "keep-alive";
208 root 1.2 delete $hdr{connection}; # keep-alive not yet supported
209 root 1.1 } else {
210     delete $hdr{connection};
211     }
212    
213     # (re-)configure handle
214     $state{handle}->timeout ($timeout);
215     $state{handle}->on_error (sub {
216     %state = ();
217     $cb->(undef, { Status => 599, Reason => "$!" });
218     });
219     $state{handle}->on_eof (sub {
220     %state = ();
221     $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
222     });
223    
224     # send request
225     $state{handle}->push_write (
226 root 1.3 "$method $path HTTP/1.0\015\012"
227 root 1.1 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
228     . "\015\012"
229 root 1.8 . (delete $arg{body})
230 root 1.1 );
231    
232     %hdr = (); # reduce memory usage, save a kitten
233    
234     # status line
235     $state{handle}->push_read (line => qr/\015?\012/, sub {
236     $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
237     or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
238    
239     my %hdr = ( # response headers
240     HTTPVersion => ",$1",
241     Status => ",$2",
242     Reason => ",$3",
243     );
244    
245     # headers, could be optimized a bit
246     $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
247     for ("$_[1]\012") {
248 root 1.2 # we support spaces in field names, as lotus domino
249     # creates them.
250 root 1.1 $hdr{lc $1} .= ",$2"
251     while /\G
252 root 1.2 ([^:\000-\037]+):
253 root 1.1 [\011\040]*
254     ((?: [^\015\012]+ | \015?\012[\011\040] )*)
255     \015?\012
256     /gxc;
257    
258     /\G$/
259     or return $cb->(undef, { Status => 599, Reason => "garbled response headers" });
260     }
261    
262     substr $_, 0, 1, ""
263     for values %hdr;
264    
265 root 1.8 my $finish = sub {
266     if ($_[1]{Status} =~ /^30[12]$/ && $recurse) {
267     http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
268     } else {
269     $cb->($_[0], $_[1]);
270     }
271     };
272    
273     if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
274 root 1.4 %state = ();
275 root 1.8 $finish->(undef, \%hdr);
276 root 1.4 } else {
277 root 1.3 if (exists $hdr{"content-length"}) {
278     $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
279     # could cache persistent connection now
280     if ($hdr{connection} =~ /\bkeep-alive\b/i) {
281     # but we don't, due to misdesigns, this is annoyingly complex
282     };
283    
284     %state = ();
285 root 1.8 $finish->($_[1], \%hdr);
286 root 1.3 });
287     } else {
288     # too bad, need to read until we get an error or EOF,
289     # no way to detect winged data.
290     $_[0]->on_error (sub {
291     %state = ();
292 root 1.8 $finish->($_[0]{rbuf}, \%hdr);
293 root 1.3 });
294     $_[0]->on_eof (undef);
295     $_[0]->on_read (sub { });
296     }
297 root 1.1 }
298     });
299     });
300     }, sub {
301     $timeout
302     };
303    
304     defined wantarray && AnyEvent::Util::guard { %state = () }
305     }
306    
307     sub http_get($$;@) {
308     unshift @_, "GET";
309     &http_request
310     }
311    
312 root 1.4 sub http_head($$;@) {
313     unshift @_, "HEAD";
314     &http_request
315     }
316    
317 root 1.3 sub http_post($$$;@) {
318     unshift @_, "POST", "body";
319     &http_request
320     }
321    
322 root 1.2 =head2 GLOBAL FUNCTIONS AND VARIABLES
323 root 1.1
324     =over 4
325    
326 root 1.2 =item AnyEvent::HTTP::set_proxy "proxy-url"
327    
328     Sets the default proxy server to use. The proxy-url must begin with a
329     string of the form C<http://host:port> (optionally C<https:...>).
330    
331 root 1.3 =item $AnyEvent::HTTP::MAX_RECURSE
332 root 1.1
333 root 1.3 The default value for the C<recurse> request parameter (default: C<10>).
334 root 1.1
335     =item $AnyEvent::HTTP::USERAGENT
336    
337     The default value for the C<User-Agent> header (the default is
338     C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
339    
340     =item $AnyEvent::HTTP::MAX_PERSISTENT
341    
342     The maximum number of persistent connections to keep open (default: 8).
343    
344 root 1.3 Not implemented currently.
345    
346 root 1.1 =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
347    
348 root 1.2 The maximum time to cache a persistent connection, in seconds (default: 2).
349 root 1.1
350 root 1.3 Not implemented currently.
351    
352 root 1.1 =back
353    
354     =cut
355    
356 root 1.2 sub set_proxy($) {
357     $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
358     }
359    
360     # initialise proxy from environment
361     set_proxy $ENV{http_proxy};
362    
363 root 1.1 =head1 SEE ALSO
364    
365     L<AnyEvent>.
366    
367     =head1 AUTHOR
368    
369     Marc Lehmann <schmorp@schmorp.de>
370     http://home.schmorp.de/
371    
372     =cut
373    
374     1
375