ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
Revision: 1.9
Committed: Wed Jun 4 13:51:53 2008 UTC (15 years, 11 months ago) by root
Branch: MAIN
Changes since 1.8: +21 -1 lines
Log Message:
*** empty log message ***

File Contents

# Content
1 =head1 NAME
2
3 AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4
5 =head1 SYNOPSIS
6
7 use AnyEvent::HTTP;
8
9 =head1 DESCRIPTION
10
11 This module is an L<AnyEvent> user, you need to make sure that you use and
12 run a supported event loop.
13
14 =head2 METHODS
15
16 =over 4
17
18 =cut
19
20 package AnyEvent::HTTP;
21
22 use strict;
23 no warnings;
24
25 use Carp;
26
27 use AnyEvent ();
28 use AnyEvent::Util ();
29 use AnyEvent::Socket ();
30 use AnyEvent::Handle ();
31
32 use base Exporter::;
33
34 our $VERSION = '1.0';
35
36 our @EXPORT = qw(http_get http_request);
37
38 our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39 our $MAX_RECURSE = 10;
40 our $MAX_PERSISTENT = 8;
41 our $PERSISTENT_TIMEOUT = 2;
42 our $TIMEOUT = 300;
43
44 # changing these is evil
45 our $MAX_PERSISTENT_PER_HOST = 2;
46 our $MAX_PER_HOST = 4; # not respected yet :(
47
48 our $PROXY;
49
50 my %KA_COUNT; # number of open keep-alive connections per host
51
52 =item http_get $url, key => value..., $cb->($data, $headers)
53
54 Executes an HTTP-GET request. See the http_request function for details on
55 additional parameters.
56
57 =item http_head $url, key => value..., $cb->($data, $headers)
58
59 Executes an HTTP-HEAD request. See the http_request function for details on
60 additional parameters.
61
62 =item http_post $url, $body, key => value..., $cb->($data, $headers)
63
64 Executes an HTTP-POST request with a request body of C<$bod>. See the
65 http_request function for details on additional parameters.
66
67 =item http_request $method => $url, key => value..., $cb->($data, $headers)
68
69 Executes a HTTP request of type C<$method> (e.g. C<GET>, C<POST>). The URL
70 must be an absolute http or https URL.
71
72 The callback will be called with the response data as first argument
73 (or C<undef> if it wasn't available due to errors), and a hash-ref with
74 response headers as second argument.
75
76 All the headers in that hash are lowercased. In addition to the response
77 headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and
78 C<Reason> contain the three parts of the HTTP Status-Line of the same
79 name.
80
81 If an internal error occurs, such as not being able to resolve a hostname,
82 then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
83 and the C<Reason> pseudo-header will contain an error message.
84
85 A typical callback might look like this:
86
87 sub {
88 my ($body, $hdr) = @_;
89
90 if ($hdr->{Status} =~ /^2/) {
91 ... everything should be ok
92 } else {
93 print "error, $hdr->{Status} $hdr->{Reason}\n";
94 }
95 }
96
97 Additional parameters are key-value pairs, and are fully optional. They
98 include:
99
100 =over 4
101
102 =item recurse => $count (default: $MAX_RECURSE)
103
104 Whether to recurse requests or not, e.g. on redirects, authentication
105 retries and so on, and how often to do so.
106
107 =item headers => hashref
108
109 The request headers to use.
110
111 =item timeout => $seconds
112
113 The time-out to use for various stages - each connect attempt will reset
114 the timeout, as will read or write activity. Default timeout is 5 minutes.
115
116 =item proxy => [$host, $port[, $scheme]] or undef
117
118 Use the given http proxy for all requests. If not specified, then the
119 default proxy (as specified by C<$ENV{http_proxy}>) is used.
120
121 C<$scheme> must be either missing or C<http> for HTTP, or C<https> for
122 HTTPS.
123
124 =item body => $string
125
126 The request body, usually empty. Will be-sent as-is (future versions of
127 this module might offer more options).
128
129 =back
130
131 Example: make a simple HTTP GET request for http://www.nethype.de/
132
133 http_request GET => "http://www.nethype.de/", sub {
134 my ($body, $hdr) = @_;
135 print "$body\n";
136 };
137
138 Example: make a HTTP HEAD request on https://www.google.com/, use a
139 timeout of 30 seconds.
140
141 http_request
142 GET => "https://www.google.com",
143 timeout => 30,
144 sub {
145 my ($body, $hdr) = @_;
146 use Data::Dumper;
147 print Dumper $hdr;
148 }
149 ;
150
151 =cut
152
153 sub http_request($$$;@) {
154 my $cb = pop;
155 my ($method, $url, %arg) = @_;
156
157 my %hdr;
158
159 $method = uc $method;
160
161 if (my $hdr = $arg{headers}) {
162 while (my ($k, $v) = each %$hdr) {
163 $hdr{lc $k} = $v;
164 }
165 }
166
167 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
168
169 return $cb->(undef, { Status => 599, Reason => "recursion limit reached" })
170 if $recurse < 0;
171
172 my $proxy = $arg{proxy} || $PROXY;
173 my $timeout = $arg{timeout} || $TIMEOUT;
174
175 $hdr{"user-agent"} ||= $USERAGENT;
176
177 my ($host, $port, $path, $scheme);
178
179 if ($proxy) {
180 ($host, $port, $scheme) = @$proxy;
181 $path = $url;
182 } else {
183 ($scheme, my $authority, $path, my $query, my $fragment) =
184 $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
185
186 $port = $scheme eq "http" ? 80
187 : $scheme eq "https" ? 443
188 : return $cb->(undef, { Status => 599, Reason => "$url: only http and https URLs supported" });
189
190 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
191 or return $cb->(undef, { Status => 599, Reason => "$url: unparsable URL" });
192
193 $host = $1;
194 $port = $2 if defined $2;
195
196 $host =~ s/^\[(.*)\]$/$1/;
197 $path .= "?$query" if length $query;
198
199 $path = "/" unless $path;
200
201 $hdr{host} = $host = lc $host;
202 }
203
204 $scheme = lc $scheme;
205
206 my %state;
207
208 $hdr{"content-length"} = length $arg{body};
209
210 $state{connect_guard} = AnyEvent::Socket::tcp_connect $host, $port, sub {
211 $state{fh} = shift
212 or return $cb->(undef, { Status => 599, Reason => "$!" });
213
214 delete $state{connect_guard}; # reduce memory usage, save a tree
215
216 # get handle
217 $state{handle} = new AnyEvent::Handle
218 fh => $state{fh},
219 ($scheme eq "https" ? (tls => "connect") : ());
220
221 # limit the number of persistent connections
222 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
223 ++$KA_COUNT{$_[1]};
224 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
225 $hdr{connection} = "keep-alive";
226 delete $hdr{connection}; # keep-alive not yet supported
227 } else {
228 delete $hdr{connection};
229 }
230
231 # (re-)configure handle
232 $state{handle}->timeout ($timeout);
233 $state{handle}->on_error (sub {
234 %state = ();
235 $cb->(undef, { Status => 599, Reason => "$!" });
236 });
237 $state{handle}->on_eof (sub {
238 %state = ();
239 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
240 });
241
242 # send request
243 $state{handle}->push_write (
244 "$method $path HTTP/1.0\015\012"
245 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
246 . "\015\012"
247 . (delete $arg{body})
248 );
249
250 %hdr = (); # reduce memory usage, save a kitten
251
252 # status line
253 $state{handle}->push_read (line => qr/\015?\012/, sub {
254 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
255 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
256
257 my %hdr = ( # response headers
258 HTTPVersion => ",$1",
259 Status => ",$2",
260 Reason => ",$3",
261 );
262
263 # headers, could be optimized a bit
264 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
265 for ("$_[1]\012") {
266 # we support spaces in field names, as lotus domino
267 # creates them.
268 $hdr{lc $1} .= ",$2"
269 while /\G
270 ([^:\000-\037]+):
271 [\011\040]*
272 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
273 \015?\012
274 /gxc;
275
276 /\G$/
277 or return $cb->(undef, { Status => 599, Reason => "garbled response headers" });
278 }
279
280 substr $_, 0, 1, ""
281 for values %hdr;
282
283 my $finish = sub {
284 if ($_[1]{Status} =~ /^30[12]$/ && $recurse) {
285 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
286 } else {
287 $cb->($_[0], $_[1]);
288 }
289 };
290
291 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
292 %state = ();
293 $finish->(undef, \%hdr);
294 } else {
295 if (exists $hdr{"content-length"}) {
296 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
297 # could cache persistent connection now
298 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
299 # but we don't, due to misdesigns, this is annoyingly complex
300 };
301
302 %state = ();
303 $finish->($_[1], \%hdr);
304 });
305 } else {
306 # too bad, need to read until we get an error or EOF,
307 # no way to detect winged data.
308 $_[0]->on_error (sub {
309 %state = ();
310 $finish->($_[0]{rbuf}, \%hdr);
311 });
312 $_[0]->on_eof (undef);
313 $_[0]->on_read (sub { });
314 }
315 }
316 });
317 });
318 }, sub {
319 $timeout
320 };
321
322 defined wantarray && AnyEvent::Util::guard { %state = () }
323 }
324
325 sub http_get($$;@) {
326 unshift @_, "GET";
327 &http_request
328 }
329
330 sub http_head($$;@) {
331 unshift @_, "HEAD";
332 &http_request
333 }
334
335 sub http_post($$$;@) {
336 unshift @_, "POST", "body";
337 &http_request
338 }
339
340 =back
341
342 =head2 GLOBAL FUNCTIONS AND VARIABLES
343
344 =over 4
345
346 =item AnyEvent::HTTP::set_proxy "proxy-url"
347
348 Sets the default proxy server to use. The proxy-url must begin with a
349 string of the form C<http://host:port> (optionally C<https:...>).
350
351 =item $AnyEvent::HTTP::MAX_RECURSE
352
353 The default value for the C<recurse> request parameter (default: C<10>).
354
355 =item $AnyEvent::HTTP::USERAGENT
356
357 The default value for the C<User-Agent> header (the default is
358 C<Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)>).
359
360 =item $AnyEvent::HTTP::MAX_PERSISTENT
361
362 The maximum number of persistent connections to keep open (default: 8).
363
364 Not implemented currently.
365
366 =item $AnyEvent::HTTP::PERSISTENT_TIMEOUT
367
368 The maximum time to cache a persistent connection, in seconds (default: 2).
369
370 Not implemented currently.
371
372 =back
373
374 =cut
375
376 sub set_proxy($) {
377 $PROXY = [$2, $3 || 3128, $1] if $_[0] =~ m%^(https?):// ([^:/]+) (?: : (\d*) )?%ix;
378 }
379
380 # initialise proxy from environment
381 set_proxy $ENV{http_proxy};
382
383 =head1 SEE ALSO
384
385 L<AnyEvent>.
386
387 =head1 AUTHOR
388
389 Marc Lehmann <schmorp@schmorp.de>
390 http://home.schmorp.de/
391
392 =cut
393
394 1
395