ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
Revision: 1.7
Committed: Wed Jun 4 12:10:27 2008 UTC (15 years, 11 months ago) by root
Branch: MAIN
Changes since 1.6: +2 -2 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4    
5     =head1 SYNOPSIS
6    
7     use AnyEvent::HTTP;
8    
9     =head1 DESCRIPTION
10    
11     This module is an L<AnyEvent> user, you need to make sure that you use and
12     run a supported event loop.
13    
14     =head2 METHODS
15    
16     =over 4
17    
18     =cut
19    
20     package AnyEvent::HTTP;
21    
22     use strict;
23     no warnings;
24    
25     use Carp;
26    
27     use AnyEvent ();
28     use AnyEvent::Util ();
29     use AnyEvent::Socket ();
30     use AnyEvent::Handle ();
31    
32     use base Exporter::;
33    
34     our $VERSION = '1.0';
35    
36     our @EXPORT = qw(http_get http_request);
37    
38     our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39 root 1.3 our $MAX_RECURSE = 10;
40 root 1.2 our $MAX_PERSISTENT = 8;
41     our $PERSISTENT_TIMEOUT = 2;
42     our $TIMEOUT = 300;
43 root 1.1
44     # changing these is evil
45     our $MAX_PERSISTENT_PER_HOST = 2;
46     our $MAX_PER_HOST = 4; # not respected yet :(
47    
48 root 1.2 our $PROXY;
49    
50 root 1.1 my %KA_COUNT; # number of open keep-alive connections per host
51    
52     =item http_get $url, key => value..., $cb->($data, $headers)
53    
54     Executes an HTTP-GET request. See the http_request function for details on
55     additional parameters.
56    
57 root 1.5 =item http_head $url, key => value..., $cb->($data, $headers)
58    
59     Executes an HTTP-HEAD request. See the http_request function for details on
60     additional parameters.
61    
62     =item http_post $url, $body, key => value..., $cb->($data, $headers)
63 root 1.3
64 root 1.7 Executes an HTTP-POST request with a request body of C<$bod>. See the
65 root 1.3 http_request function for details on additional parameters.
66    
67 root 1.1 =item http_request $method => $url, key => value..., $cb->($data, $headers)
68    
69     Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
70     must be an absolute http or https URL.
71    
72 root 1.2 The callback will be called with the response data as first argument
73     (or C<undef> if it wasn't available due to errors), and a hash-ref with
74     response headers as second argument.
75    
76 root 1.7 All the headers in that hash are lowercased. In addition to the response
77 root 1.2 headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
78     C<Reason> contain the three parts of the HTTP Status-Line of the same
79     name.
80    
81     If an internal error occurs, such as not being able to resolve a hostname,
82     then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
83     and the C<Reason> pseudo-header will contain an error message.
84    
85 root 1.6 A typical callback might look like this:
86    
87     sub {
88     my ($body, $hdr) = @_;
89    
90     if ($hdr->{Status} =~ /^2/) {
91     ... everything should be ok
92     } else {
93     print "error, $hdr->{Status} $hdr->{Reason}\n";
94     }
95     }
96    
97 root 1.1 Additional parameters are key-value pairs, and are fully optional. They
98     include:
99    
100     =over 4
101    
102 root 1.3 =item recurse => $count (default: $MAX_RECURSE)
103 root 1.1
104     Whether to recurse requests or not, e.g. on redirects, authentication
105 root 1.3 retries and so on, and how often to do so.
106 root 1.1
107     =item headers => hashref
108    
109     The request headers to use.
110    
111     =item timeout => $seconds
112    
113     The time-out to use for various stages - each connect attempt will reset
114 root 1.2 the timeout, as will read or write activity. Default timeout is 5 minutes.
115    
116     =item proxy => [$host, $port[, $scheme]] or undef
117    
118     Use the given http proxy for all requests. If not specified, then the
119     default proxy (as specified by C<$ENV{http_proxy}>) is used.
120    
121     C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
122     HTTPS.
123 root 1.1
124 root 1.3 =item body => $string
125    
126     The request body, usually empty. Will be-sent as-is (future versions of
127     this module might offer more options).
128    
129 root 1.1 =back
130    
131     =back
132    
133     =cut
134    
135     sub http_request($$$;@) {
136     my $cb = pop;
137     my ($method, $url, %arg) = @_;
138    
139     my %hdr;
140    
141 root 1.3 $method = uc $method;
142    
143 root 1.1 if (my $hdr = delete $arg{headers}) {
144     while (my ($k, $v) = each %$hdr) {
145     $hdr{lc $k} = $v;
146     }
147     }
148    
149 root 1.2 my $proxy = $arg{proxy} || $PROXY;
150 root 1.1 my $timeout = $arg{timeout} || $TIMEOUT;
151 root 1.3 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
152 root 1.1
153     $hdr{"user-agent"} ||= $USERAGENT;
154    
155 root 1.2 my ($host, $port, $path, $scheme);
156    
157     if ($proxy) {
158     ($host, $port, $scheme) = @$proxy;
159     $path = $url;
160     } else {
161     ($scheme, my $authority, $path, my $query, my $fragment) =
162     $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
163 root 1.1
164 root 1.2 $port = $scheme eq "http" ? 80
165 root 1.1 : $scheme eq "https" ? 443
166     : croak "$url: only http and https URLs supported";
167    
168 root 1.2 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
169     or croak "$authority: unparsable URL";
170    
171     $host = $1;
172     $port = $2 if defined $2;
173 root 1.1
174 root 1.2 $host =~ s/^\[(.*)\]$/$1/;
175     $path .= "?$query" if length $query;
176 root 1.1
177 root 1.2 $path = "/" unless $path;
178 root 1.1
179 root 1.2 $hdr{host} = $host = lc $host;
180     }
181    
182     $scheme = lc $scheme;
183 root 1.1
184     my %state;
185    
186 root 1.3 $state{body} = delete $arg{body};
187 root 1.1
188 root 1.3 $hdr{"content-length"} = length $state{body};
189 root 1.1
190     $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
191     $state{fh} = shift
192     or return $cb->(undef, { Status => 599, Reason => "$!" });
193    
194     delete $state{connect_guard}; # reduce memory usage, save a tree
195    
196     # get handle
197     $state{handle} = new AnyEvent::Handle
198     fh => $state{fh},
199     ($scheme eq "https" ? (tls => "connect") : ());
200    
201     # limit the number of persistent connections
202     if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
203     ++$KA_COUNT{$_[1]};
204     $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
205     $hdr{connection} = "keep-alive";
206 root 1.2 delete $hdr{connection}; # keep-alive not yet supported
207 root 1.1 } else {
208     delete $hdr{connection};
209     }
210    
211     # (re-)configure handle
212     $state{handle}->timeout ($timeout);
213     $state{handle}->on_error (sub {
214     %state = ();
215     $cb->(undef, { Status => 599, Reason => "$!" });
216     });
217     $state{handle}->on_eof (sub {
218     %state = ();
219     $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
220     });
221    
222     # send request
223     $state{handle}->push_write (
224 root 1.3 "$method $path HTTP/1.0\015\012"
225 root 1.1 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
226     . "\015\012"
227     . (delete $state{body})
228     );
229    
230     %hdr = (); # reduce memory usage, save a kitten
231    
232     # status line
233     $state{handle}->push_read (line => qr/\015?\012/, sub {
234     $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
235     or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
236    
237     my %hdr = ( # response headers
238     HTTPVersion => ",$1",
239     Status => ",$2",
240     Reason => ",$3",
241     );
242    
243     # headers, could be optimized a bit
244     $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
245     for ("$_[1]\012") {
246 root 1.2 # we support spaces in field names, as lotus domino
247     # creates them.
248 root 1.1 $hdr{lc $1} .= ",$2"
249     while /\G
250 root 1.2 ([^:\000-\037]+):
251 root 1.1 [\011\040]*
252     ((?: [^\015\012]+ | \015?\012[\011\040] )*)
253     \015?\012
254     /gxc;
255    
256     /\G$/
257     or return $cb->(undef, { Status => 599, Reason => "garbled response headers" });
258     }
259    
260     substr $_, 0, 1, ""
261     for values %hdr;
262    
263 root 1.4 if ($method eq "HEAD") {
264     %state = ();
265     $cb->(undef, \%hdr);
266     } else {
267 root 1.3 if (exists $hdr{"content-length"}) {
268     $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
269     # could cache persistent connection now
270     if ($hdr{connection} =~ /\bkeep-alive\b/i) {
271     # but we don't, due to misdesigns, this is annoyingly complex
272     };
273    
274     %state = ();
275     $cb->($_[1], \%hdr);
276     });
277     } else {
278     # too bad, need to read until we get an error or EOF,
279     # no way to detect winged data.
280     $_[0]->on_error (sub {
281     %state = ();
282     $cb->($_[0]{rbuf}, \%hdr);
283     });
284     $_[0]->on_eof (undef);
285     $_[0]->on_read (sub { });
286     }
287 root 1.1 }
288     });
289     });
290     }, sub {
291     $timeout
292     };
293    
294     defined wantarray && AnyEvent::Util::guard { %state = () }
295     }
296    
297     sub http_get($$;@) {
298     unshift @_, "GET";
299     &http_request
300     }
301    
302 root 1.4 sub http_head($$;@) {
303     unshift @_, "HEAD";
304     &http_request
305     }
306    
307 root 1.3 sub http_post($$$;@) {
308     unshift @_, "POST", "body";
309     &http_request
310     }
311    
312 root 1.2 =head2 GLOBAL FUNCTIONS AND VARIABLES
313 root 1.1
314     =over 4
315    
316 root 1.2 =item AnyEvent::HTTP::set_proxy "proxy-url"
317    
318     Sets the default proxy server to use. The proxy-url must begin with a
319     string of the form C<http://host:port> (optionally C<https:...>).
320    
321 root 1.3 =item $AnyEvent::HTTP::MAX_RECURSE
322 root 1.1
323 root 1.3 The default value for the C<recurse> request parameter (default: C<10>).
324 root 1.1
325     =item $AnyEvent::HTTP::USERAGENT
326    
327     The default value for the C<User-Agent> header (the default is
328     C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
329    
330     =item $AnyEvent::HTTP::MAX_PERSISTENT
331    
332     The maximum number of persistent connections to keep open (default: 8).
333    
334 root 1.3 Not implemented currently.
335    
336 root 1.1 =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
337    
338 root 1.2 The maximum time to cache a persistent connection, in seconds (default: 2).
339 root 1.1
340 root 1.3 Not implemented currently.
341    
342 root 1.1 =back
343    
344     =cut
345    
346 root 1.2 sub set_proxy($) {
347     $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
348     }
349    
350     # initialise proxy from environment
351     set_proxy $ENV{http_proxy};
352    
353 root 1.1 =head1 SEE ALSO
354    
355     L<AnyEvent>.
356    
357     =head1 AUTHOR
358    
359     Marc Lehmann <schmorp@schmorp.de>
360     http://home.schmorp.de/
361    
362     =cut
363    
364     1
365