ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.10 by root, Thu Jun 5 13:06:43 2008 UTC vs.
Revision 1.17 by root, Fri Jun 6 13:02:38 2008 UTC

3AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client 3AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4 4
5=head1 SYNOPSIS 5=head1 SYNOPSIS
6 6
7 use AnyEvent::HTTP; 7 use AnyEvent::HTTP;
8
9 http_get "http://www.nethype.de/", sub { print $_[1] };
10
11 # ... do something else here
8 12
9=head1 DESCRIPTION 13=head1 DESCRIPTION
10 14
11This module is an L<AnyEvent> user, you need to make sure that you use and 15This module is an L<AnyEvent> user, you need to make sure that you use and
12run a supported event loop. 16run a supported event loop.
17
18This module implements a simple, stateless and non-blocking HTTP
19client. It supports GET, POST and other request methods, cookies and more,
20all on a very low level. It can follow redirects supports proxies and
21automatically limits the number of connections to the values specified in
22the RFC.
23
24It should generally be a "good client" that is enough for most HTTP
25tasks. Simple tasks should be simple, but complex tasks should still be
26possible as the user retains control over request and response headers.
27
28The caller is responsible for authentication management, cookies (if
29the simplistic implementation in this module doesn't suffice), referer
30and other high-level protocol details for which this module offers only
31limited support.
13 32
14=head2 METHODS 33=head2 METHODS
15 34
16=over 4 35=over 4
17 36
29use AnyEvent::Socket (); 48use AnyEvent::Socket ();
30use AnyEvent::Handle (); 49use AnyEvent::Handle ();
31 50
32use base Exporter::; 51use base Exporter::;
33 52
34our $VERSION = '1.0'; 53our $VERSION = '1.01';
35 54
36our @EXPORT = qw(http_get http_request); 55our @EXPORT = qw(http_get http_post http_head http_request);
37 56
38our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 57our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39our $MAX_RECURSE = 10; 58our $MAX_RECURSE = 10;
40our $MAX_PERSISTENT = 8; 59our $MAX_PERSISTENT = 8;
41our $PERSISTENT_TIMEOUT = 2; 60our $PERSISTENT_TIMEOUT = 2;
42our $TIMEOUT = 300; 61our $TIMEOUT = 300;
43 62
44# changing these is evil 63# changing these is evil
45our $MAX_PERSISTENT_PER_HOST = 2; 64our $MAX_PERSISTENT_PER_HOST = 2;
46our $MAX_PER_HOST = 4; # not respected yet :( 65our $MAX_PER_HOST = 4;
47 66
48our $PROXY; 67our $PROXY;
68our $ACTIVE = 0;
49 69
50my %KA_COUNT; # number of open keep-alive connections per host 70my %KA_COUNT; # number of open keep-alive connections per host
71my %CO_SLOT; # number of open connections, and wait queue, per host
51 72
52=item http_get $url, key => value..., $cb->($data, $headers) 73=item http_get $url, key => value..., $cb->($data, $headers)
53 74
54Executes an HTTP-GET request. See the http_request function for details on 75Executes an HTTP-GET request. See the http_request function for details on
55additional parameters. 76additional parameters.
105Whether to recurse requests or not, e.g. on redirects, authentication 126Whether to recurse requests or not, e.g. on redirects, authentication
106retries and so on, and how often to do so. 127retries and so on, and how often to do so.
107 128
108=item headers => hashref 129=item headers => hashref
109 130
110The request headers to use. 131The request headers to use. Currently, C<http_request> may provide its
132own C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers
133and will provide defaults for C<User-Agent:> and C<Referer:>.
111 134
112=item timeout => $seconds 135=item timeout => $seconds
113 136
114The time-out to use for various stages - each connect attempt will reset 137The time-out to use for various stages - each connect attempt will reset
115the timeout, as will read or write activity. Default timeout is 5 minutes. 138the timeout, as will read or write activity. Default timeout is 5 minutes.
165 } 188 }
166 ; 189 ;
167 190
168=cut 191=cut
169 192
193sub _slot_schedule;
194sub _slot_schedule($) {
195 my $host = shift;
196
197 while ($CO_SLOT{$host}[0] < $MAX_PER_HOST) {
198 if (my $cb = shift @{ $CO_SLOT{$host}[1] }) {
199 # somebody wants that slot
200 ++$CO_SLOT{$host}[0];
201 ++$ACTIVE;
202
203 $cb->(AnyEvent::Util::guard {
204 --$ACTIVE;
205 --$CO_SLOT{$host}[0];
206 _slot_schedule $host;
207 });
208 } else {
209 # nobody wants the slot, maybe we can forget about it
210 delete $CO_SLOT{$host} unless $CO_SLOT{$host}[0];
211 last;
212 }
213 }
214}
215
216# wait for a free slot on host, call callback
217sub _get_slot($$) {
218 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
219
220 _slot_schedule $_[0];
221}
222
170sub http_request($$$;@) { 223sub http_request($$@) {
171 my $cb = pop; 224 my $cb = pop;
172 my ($method, $url, %arg) = @_; 225 my ($method, $url, %arg) = @_;
173 226
174 my %hdr; 227 my %hdr;
175 228
197 $scheme = lc $scheme; 250 $scheme = lc $scheme;
198 251
199 my $uport = $scheme eq "http" ? 80 252 my $uport = $scheme eq "http" ? 80
200 : $scheme eq "https" ? 443 253 : $scheme eq "https" ? 443
201 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported" }); 254 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported" });
255
256 $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic
202 257
203 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 258 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
204 or return $cb->(undef, { Status => 599, Reason => "unparsable URL" }); 259 or return $cb->(undef, { Status => 599, Reason => "unparsable URL" });
205 260
206 my $uhost = $1; 261 my $uhost = $1;
245 $hdr{host} = $uhost; 300 $hdr{host} = $uhost;
246 } 301 }
247 302
248 $hdr{"content-length"} = length $arg{body}; 303 $hdr{"content-length"} = length $arg{body};
249 304
250 my %state; 305 my %state = (connect_guard => 1);
251 306
307 _get_slot $uhost, sub {
308 $state{slot_guard} = shift;
309
310 return unless $state{connect_guard};
311
252 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 312 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
253 $state{fh} = shift 313 $state{fh} = shift
254 or return $cb->(undef, { Status => 599, Reason => "$!" }); 314 or return $cb->(undef, { Status => 599, Reason => "$!" });
255 315
256 delete $state{connect_guard}; # reduce memory usage, save a tree 316 delete $state{connect_guard}; # reduce memory usage, save a tree
257 317
258 # get handle 318 # get handle
259 $state{handle} = new AnyEvent::Handle 319 $state{handle} = new AnyEvent::Handle
260 fh => $state{fh}, 320 fh => $state{fh},
261 ($scheme eq "https" ? (tls => "connect") : ()); 321 ($scheme eq "https" ? (tls => "connect") : ());
262 322
263 # limit the number of persistent connections 323 # limit the number of persistent connections
264 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 324 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
265 ++$KA_COUNT{$_[1]}; 325 ++$KA_COUNT{$_[1]};
266 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 326 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
267 $hdr{connection} = "keep-alive"; 327 $hdr{connection} = "keep-alive";
268 delete $hdr{connection}; # keep-alive not yet supported 328 delete $hdr{connection}; # keep-alive not yet supported
269 } else { 329 } else {
270 delete $hdr{connection}; 330 delete $hdr{connection};
271 } 331 }
272 332
273 # (re-)configure handle 333 # (re-)configure handle
274 $state{handle}->timeout ($timeout); 334 $state{handle}->timeout ($timeout);
275 $state{handle}->on_error (sub { 335 $state{handle}->on_error (sub {
336 my $errno = "$!";
276 %state = (); 337 %state = ();
277 $cb->(undef, { Status => 599, Reason => "$!" }); 338 $cb->(undef, { Status => 599, Reason => $errno });
278 }); 339 });
279 $state{handle}->on_eof (sub { 340 $state{handle}->on_eof (sub {
280 %state = (); 341 %state = ();
281 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 342 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" });
282 }); 343 });
283 344
284 # send request 345 # send request
285 $state{handle}->push_write ( 346 $state{handle}->push_write (
286 "$method $rpath HTTP/1.0\015\012" 347 "$method $rpath HTTP/1.0\015\012"
287 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 348 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
288 . "\015\012" 349 . "\015\012"
289 . (delete $arg{body}) 350 . (delete $arg{body})
290 );
291
292 %hdr = (); # reduce memory usage, save a kitten
293
294 # status line
295 $state{handle}->push_read (line => qr/\015?\012/, sub {
296 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
297 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
298
299 my %hdr = ( # response headers
300 HTTPVersion => "\x00$1",
301 Status => "\x00$2",
302 Reason => "\x00$3",
303 ); 351 );
304 352
353 %hdr = (); # reduce memory usage, save a kitten
354
355 # status line
356 $state{handle}->push_read (line => qr/\015?\012/, sub {
357 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
358 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
359
360 my %hdr = ( # response headers
361 HTTPVersion => "\x00$1",
362 Status => "\x00$2",
363 Reason => "\x00$3",
364 );
365
305 # headers, could be optimized a bit 366 # headers, could be optimized a bit
306 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 367 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
307 for ("$_[1]\012") { 368 for ("$_[1]\012") {
308 # we support spaces in field names, as lotus domino 369 # we support spaces in field names, as lotus domino
309 # creates them. 370 # creates them.
310 $hdr{lc $1} .= "\x00$2" 371 $hdr{lc $1} .= "\x00$2"
311 while /\G 372 while /\G
312 ([^:\000-\037]+): 373 ([^:\000-\037]+):
313 [\011\040]* 374 [\011\040]*
314 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 375 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
315 \015?\012 376 \015?\012
316 /gxc; 377 /gxc;
317 378
318 /\G$/ 379 /\G$/
319 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" })); 380 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" }));
320 } 381 }
321 382
322 substr $_, 0, 1, "" 383 substr $_, 0, 1, ""
323 for values %hdr; 384 for values %hdr;
324 385
325 my $finish = sub { 386 my $finish = sub {
326 %state = (); 387 %state = ();
327 388
328 # set-cookie processing 389 # set-cookie processing
329 if ($arg{cookie_jar} && exists $hdr{"set-cookie"}) { 390 if ($arg{cookie_jar} && exists $hdr{"set-cookie"}) {
330 for (split /\x00/, $hdr{"set-cookie"}) { 391 for (split /\x00/, $hdr{"set-cookie"}) {
331 my ($cookie, @arg) = split /;\s*/; 392 my ($cookie, @arg) = split /;\s*/;
332 my ($name, $value) = split /=/, $cookie, 2; 393 my ($name, $value) = split /=/, $cookie, 2;
333 my %kv = (value => $value, map { split /=/, $_, 2 } @arg); 394 my %kv = (value => $value, map { split /=/, $_, 2 } @arg);
334 395
335 my $cdom = (delete $kv{domain}) || $uhost; 396 my $cdom = (delete $kv{domain}) || $uhost;
336 my $cpath = (delete $kv{path}) || "/"; 397 my $cpath = (delete $kv{path}) || "/";
337 398
338 $cdom =~ s/^.?/./; # make sure it starts with a "." 399 $cdom =~ s/^.?/./; # make sure it starts with a "."
339 400
401 next if $cdom =~ /\.$/;
402
403 # this is not rfc-like and not netscape-like. go figure.
340 my $ndots = $cdom =~ y/.//; 404 my $ndots = $cdom =~ y/.//;
341 next if $ndots < ($cdom =~ /[^.]{3}$/ ? 2 : 3); 405 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
342 406
343 # store it 407 # store it
344 $arg{cookie_jar}{version} = 1; 408 $arg{cookie_jar}{version} = 1;
345 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv; 409 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
410 }
411 }
412
413 if ($_[1]{Status} =~ /^30[12]$/ && $recurse) {
414 # microsoft and other assholes don't give a shit for following standards,
415 # try to support a common form of broken Location header.
416 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
417
418 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
419 } else {
420 $cb->($_[0], $_[1]);
421 }
422 };
423
424 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
425 $finish->(undef, \%hdr);
426 } else {
427 if (exists $hdr{"content-length"}) {
428 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
429 # could cache persistent connection now
430 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
431 # but we don't, due to misdesigns, this is annoyingly complex
432 };
433
434 $finish->($_[1], \%hdr);
435 });
436 } else {
437 # too bad, need to read until we get an error or EOF,
438 # no way to detect winged data.
439 $_[0]->on_error (sub {
440 $finish->($_[0]{rbuf}, \%hdr);
441 });
442 $_[0]->on_eof (undef);
443 $_[0]->on_read (sub { });
346 } 444 }
347 } 445 }
348
349 if ($_[1]{Status} =~ /^x30[12]$/ && $recurse) {
350 # microsoft and other assholes don't give a shit for following standards,
351 # try to support a common form of broken Location header.
352 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
353
354 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
355 } else {
356 $cb->($_[0], $_[1]);
357 }
358 }; 446 });
359
360 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
361 $finish->(undef, \%hdr);
362 } else {
363 if (exists $hdr{"content-length"}) {
364 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
365 # could cache persistent connection now
366 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
367 # but we don't, due to misdesigns, this is annoyingly complex
368 };
369
370 $finish->($_[1], \%hdr);
371 });
372 } else {
373 # too bad, need to read until we get an error or EOF,
374 # no way to detect winged data.
375 $_[0]->on_error (sub {
376 $finish->($_[0]{rbuf}, \%hdr);
377 });
378 $_[0]->on_eof (undef);
379 $_[0]->on_read (sub { });
380 }
381 }
382 }); 447 });
448 }, sub {
449 $timeout
383 }); 450 };
384 }, sub {
385 $timeout
386 }; 451 };
387 452
388 defined wantarray && AnyEvent::Util::guard { %state = () } 453 defined wantarray && AnyEvent::Util::guard { %state = () }
389} 454}
390 455
391sub http_get($$;@) { 456sub http_get($@) {
392 unshift @_, "GET"; 457 unshift @_, "GET";
393 &http_request 458 &http_request
394} 459}
395 460
396sub http_head($$;@) { 461sub http_head($@) {
397 unshift @_, "HEAD"; 462 unshift @_, "HEAD";
398 &http_request 463 &http_request
399} 464}
400 465
401sub http_post($$$;@) { 466sub http_post($$@) {
402 unshift @_, "POST", "body"; 467 unshift @_, "POST", "body";
403 &http_request 468 &http_request
404} 469}
405 470
406=back 471=back
433 498
434The maximum time to cache a persistent connection, in seconds (default: 2). 499The maximum time to cache a persistent connection, in seconds (default: 2).
435 500
436Not implemented currently. 501Not implemented currently.
437 502
503=item $AnyEvent::HTTP::ACTIVE
504
505The number of active connections. This is not the number of currently
506running requests, but the number of currently open and non-idle TCP
507connections. This number of can be useful for load-leveling.
508
438=back 509=back
439 510
440=cut 511=cut
441 512
442sub set_proxy($) { 513sub set_proxy($) {

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines