ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
Revision: 1.5
Committed: Wed Jun 4 12:03:47 2008 UTC (15 years, 11 months ago) by root
Branch: MAIN
Changes since 1.4: +6 -1 lines
Log Message:
*** empty log message ***

File Contents

# User Rev Content
1 root 1.1 =head1 NAME
2    
3     AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4    
5     =head1 SYNOPSIS
6    
7     use AnyEvent::HTTP;
8    
9     =head1 DESCRIPTION
10    
11     This module is an L<AnyEvent> user, you need to make sure that you use and
12     run a supported event loop.
13    
14     =head2 METHODS
15    
16     =over 4
17    
18     =cut
19    
20     package AnyEvent::HTTP;
21    
22     use strict;
23     no warnings;
24    
25     use Carp;
26    
27     use AnyEvent ();
28     use AnyEvent::Util ();
29     use AnyEvent::Socket ();
30     use AnyEvent::Handle ();
31    
32     use base Exporter::;
33    
34     our $VERSION = '1.0';
35    
36     our @EXPORT = qw(http_get http_request);
37    
38     our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39 root 1.3 our $MAX_RECURSE = 10;
40 root 1.2 our $MAX_PERSISTENT = 8;
41     our $PERSISTENT_TIMEOUT = 2;
42     our $TIMEOUT = 300;
43 root 1.1
44     # changing these is evil
45     our $MAX_PERSISTENT_PER_HOST = 2;
46     our $MAX_PER_HOST = 4; # not respected yet :(
47    
48 root 1.2 our $PROXY;
49    
50 root 1.1 my %KA_COUNT; # number of open keep-alive connections per host
51    
52     =item http_get $url, key => value..., $cb->($data, $headers)
53    
54     Executes an HTTP-GET request. See the http_request function for details on
55     additional parameters.
56    
57 root 1.5 =item http_head $url, key => value..., $cb->($data, $headers)
58    
59     Executes an HTTP-HEAD request. See the http_request function for details on
60     additional parameters.
61    
62     =item http_post $url, $body, key => value..., $cb->($data, $headers)
63 root 1.3
64     Executes an HTTP-POST request with a requets body of C<$bod>. See the
65     http_request function for details on additional parameters.
66    
67 root 1.1 =item http_request $method => $url, key => value..., $cb->($data, $headers)
68    
69     Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
70     must be an absolute http or https URL.
71    
72 root 1.2 The callback will be called with the response data as first argument
73     (or C<undef> if it wasn't available due to errors), and a hash-ref with
74     response headers as second argument.
75    
76     All the headers in that has are lowercased. In addition to the response
77     headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
78     C<Reason> contain the three parts of the HTTP Status-Line of the same
79     name.
80    
81     If an internal error occurs, such as not being able to resolve a hostname,
82     then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
83     and the C<Reason> pseudo-header will contain an error message.
84    
85 root 1.1 Additional parameters are key-value pairs, and are fully optional. They
86     include:
87    
88     =over 4
89    
90 root 1.3 =item recurse => $count (default: $MAX_RECURSE)
91 root 1.1
92     Whether to recurse requests or not, e.g. on redirects, authentication
93 root 1.3 retries and so on, and how often to do so.
94 root 1.1
95     =item headers => hashref
96    
97     The request headers to use.
98    
99     =item timeout => $seconds
100    
101     The time-out to use for various stages - each connect attempt will reset
102 root 1.2 the timeout, as will read or write activity. Default timeout is 5 minutes.
103    
104     =item proxy => [$host, $port[, $scheme]] or undef
105    
106     Use the given http proxy for all requests. If not specified, then the
107     default proxy (as specified by C<$ENV{http_proxy}>) is used.
108    
109     C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
110     HTTPS.
111 root 1.1
112 root 1.3 =item body => $string
113    
114     The request body, usually empty. Will be-sent as-is (future versions of
115     this module might offer more options).
116    
117 root 1.1 =back
118    
119     =back
120    
121     =cut
122    
123     sub http_request($$$;@) {
124     my $cb = pop;
125     my ($method, $url, %arg) = @_;
126    
127     my %hdr;
128    
129 root 1.3 $method = uc $method;
130    
131 root 1.1 if (my $hdr = delete $arg{headers}) {
132     while (my ($k, $v) = each %$hdr) {
133     $hdr{lc $k} = $v;
134     }
135     }
136    
137 root 1.2 my $proxy = $arg{proxy} || $PROXY;
138 root 1.1 my $timeout = $arg{timeout} || $TIMEOUT;
139 root 1.3 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
140 root 1.1
141     $hdr{"user-agent"} ||= $USERAGENT;
142    
143 root 1.2 my ($host, $port, $path, $scheme);
144    
145     if ($proxy) {
146     ($host, $port, $scheme) = @$proxy;
147     $path = $url;
148     } else {
149     ($scheme, my $authority, $path, my $query, my $fragment) =
150     $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
151 root 1.1
152 root 1.2 $port = $scheme eq "http" ? 80
153 root 1.1 : $scheme eq "https" ? 443
154     : croak "$url: only http and https URLs supported";
155    
156 root 1.2 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
157     or croak "$authority: unparsable URL";
158    
159     $host = $1;
160     $port = $2 if defined $2;
161 root 1.1
162 root 1.2 $host =~ s/^\[(.*)\]$/$1/;
163     $path .= "?$query" if length $query;
164 root 1.1
165 root 1.2 $path = "/" unless $path;
166 root 1.1
167 root 1.2 $hdr{host} = $host = lc $host;
168     }
169    
170     $scheme = lc $scheme;
171 root 1.1
172     my %state;
173    
174 root 1.3 $state{body} = delete $arg{body};
175 root 1.1
176 root 1.3 $hdr{"content-length"} = length $state{body};
177 root 1.1
178     $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
179     $state{fh} = shift
180     or return $cb->(undef, { Status => 599, Reason => "$!" });
181    
182     delete $state{connect_guard}; # reduce memory usage, save a tree
183    
184     # get handle
185     $state{handle} = new AnyEvent::Handle
186     fh => $state{fh},
187     ($scheme eq "https" ? (tls => "connect") : ());
188    
189     # limit the number of persistent connections
190     if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
191     ++$KA_COUNT{$_[1]};
192     $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
193     $hdr{connection} = "keep-alive";
194 root 1.2 delete $hdr{connection}; # keep-alive not yet supported
195 root 1.1 } else {
196     delete $hdr{connection};
197     }
198    
199     # (re-)configure handle
200     $state{handle}->timeout ($timeout);
201     $state{handle}->on_error (sub {
202     %state = ();
203     $cb->(undef, { Status => 599, Reason => "$!" });
204     });
205     $state{handle}->on_eof (sub {
206     %state = ();
207     $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
208     });
209    
210     # send request
211     $state{handle}->push_write (
212 root 1.3 "$method $path HTTP/1.0\015\012"
213 root 1.1 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
214     . "\015\012"
215     . (delete $state{body})
216     );
217    
218     %hdr = (); # reduce memory usage, save a kitten
219    
220     # status line
221     $state{handle}->push_read (line => qr/\015?\012/, sub {
222     $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
223     or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
224    
225     my %hdr = ( # response headers
226     HTTPVersion => ",$1",
227     Status => ",$2",
228     Reason => ",$3",
229     );
230    
231     # headers, could be optimized a bit
232     $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
233     for ("$_[1]\012") {
234 root 1.2 # we support spaces in field names, as lotus domino
235     # creates them.
236 root 1.1 $hdr{lc $1} .= ",$2"
237     while /\G
238 root 1.2 ([^:\000-\037]+):
239 root 1.1 [\011\040]*
240     ((?: [^\015\012]+ | \015?\012[\011\040] )*)
241     \015?\012
242     /gxc;
243    
244     /\G$/
245     or return $cb->(undef, { Status => 599, Reason => "garbled response headers" });
246     }
247    
248     substr $_, 0, 1, ""
249     for values %hdr;
250    
251 root 1.4 if ($method eq "HEAD") {
252     %state = ();
253     $cb->(undef, \%hdr);
254     } else {
255 root 1.3 if (exists $hdr{"content-length"}) {
256     $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
257     # could cache persistent connection now
258     if ($hdr{connection} =~ /\bkeep-alive\b/i) {
259     # but we don't, due to misdesigns, this is annoyingly complex
260     };
261    
262     %state = ();
263     $cb->($_[1], \%hdr);
264     });
265     } else {
266     # too bad, need to read until we get an error or EOF,
267     # no way to detect winged data.
268     $_[0]->on_error (sub {
269     %state = ();
270     $cb->($_[0]{rbuf}, \%hdr);
271     });
272     $_[0]->on_eof (undef);
273     $_[0]->on_read (sub { });
274     }
275 root 1.1 }
276     });
277     });
278     }, sub {
279     $timeout
280     };
281    
282     defined wantarray && AnyEvent::Util::guard { %state = () }
283     }
284    
285     sub http_get($$;@) {
286     unshift @_, "GET";
287     &http_request
288     }
289    
290 root 1.4 sub http_head($$;@) {
291     unshift @_, "HEAD";
292     &http_request
293     }
294    
295 root 1.3 sub http_post($$$;@) {
296     unshift @_, "POST", "body";
297     &http_request
298     }
299    
300 root 1.2 =head2 GLOBAL FUNCTIONS AND VARIABLES
301 root 1.1
302     =over 4
303    
304 root 1.2 =item AnyEvent::HTTP::set_proxy "proxy-url"
305    
306     Sets the default proxy server to use. The proxy-url must begin with a
307     string of the form C<http://host:port> (optionally C<https:...>).
308    
309 root 1.3 =item $AnyEvent::HTTP::MAX_RECURSE
310 root 1.1
311 root 1.3 The default value for the C<recurse> request parameter (default: C<10>).
312 root 1.1
313     =item $AnyEvent::HTTP::USERAGENT
314    
315     The default value for the C<User-Agent> header (the default is
316     C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
317    
318     =item $AnyEvent::HTTP::MAX_PERSISTENT
319    
320     The maximum number of persistent connections to keep open (default: 8).
321    
322 root 1.3 Not implemented currently.
323    
324 root 1.1 =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
325    
326 root 1.2 The maximum time to cache a persistent connection, in seconds (default: 2).
327 root 1.1
328 root 1.3 Not implemented currently.
329    
330 root 1.1 =back
331    
332     =cut
333    
334 root 1.2 sub set_proxy($) {
335     $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
336     }
337    
338     # initialise proxy from environment
339     set_proxy $ENV{http_proxy};
340    
341 root 1.1 =head1 SEE ALSO
342    
343     L<AnyEvent>.
344    
345     =head1 AUTHOR
346    
347     Marc Lehmann <schmorp@schmorp.de>
348     http://home.schmorp.de/
349    
350     =cut
351    
352     1
353