ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.10 by root, Thu Jun 5 13:06:43 2008 UTC vs.
Revision 1.19 by elmex, Mon Jun 9 13:02:13 2008 UTC

3AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client 3AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4 4
5=head1 SYNOPSIS 5=head1 SYNOPSIS
6 6
7 use AnyEvent::HTTP; 7 use AnyEvent::HTTP;
8
9 http_get "http://www.nethype.de/", sub { print $_[1] };
10
11 # ... do something else here
8 12
9=head1 DESCRIPTION 13=head1 DESCRIPTION
10 14
11This module is an L<AnyEvent> user, you need to make sure that you use and 15This module is an L<AnyEvent> user, you need to make sure that you use and
12run a supported event loop. 16run a supported event loop.
17
18This module implements a simple, stateless and non-blocking HTTP
19client. It supports GET, POST and other request methods, cookies and more,
20all on a very low level. It can follow redirects supports proxies and
21automatically limits the number of connections to the values specified in
22the RFC.
23
24It should generally be a "good client" that is enough for most HTTP
25tasks. Simple tasks should be simple, but complex tasks should still be
26possible as the user retains control over request and response headers.
27
28The caller is responsible for authentication management, cookies (if
29the simplistic implementation in this module doesn't suffice), referer
30and other high-level protocol details for which this module offers only
31limited support.
13 32
14=head2 METHODS 33=head2 METHODS
15 34
16=over 4 35=over 4
17 36
29use AnyEvent::Socket (); 48use AnyEvent::Socket ();
30use AnyEvent::Handle (); 49use AnyEvent::Handle ();
31 50
32use base Exporter::; 51use base Exporter::;
33 52
34our $VERSION = '1.0'; 53our $VERSION = '1.01';
35 54
36our @EXPORT = qw(http_get http_request); 55our @EXPORT = qw(http_get http_post http_head http_request);
37 56
38our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 57our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
39our $MAX_RECURSE = 10; 58our $MAX_RECURSE = 10;
40our $MAX_PERSISTENT = 8; 59our $MAX_PERSISTENT = 8;
41our $PERSISTENT_TIMEOUT = 2; 60our $PERSISTENT_TIMEOUT = 2;
42our $TIMEOUT = 300; 61our $TIMEOUT = 300;
43 62
44# changing these is evil 63# changing these is evil
45our $MAX_PERSISTENT_PER_HOST = 2; 64our $MAX_PERSISTENT_PER_HOST = 2;
46our $MAX_PER_HOST = 4; # not respected yet :( 65our $MAX_PER_HOST = 4;
47 66
48our $PROXY; 67our $PROXY;
68our $ACTIVE = 0;
49 69
50my %KA_COUNT; # number of open keep-alive connections per host 70my %KA_COUNT; # number of open keep-alive connections per host
71my %CO_SLOT; # number of open connections, and wait queue, per host
51 72
52=item http_get $url, key => value..., $cb->($data, $headers) 73=item http_get $url, key => value..., $cb->($data, $headers)
53 74
54Executes an HTTP-GET request. See the http_request function for details on 75Executes an HTTP-GET request. See the http_request function for details on
55additional parameters. 76additional parameters.
105Whether to recurse requests or not, e.g. on redirects, authentication 126Whether to recurse requests or not, e.g. on redirects, authentication
106retries and so on, and how often to do so. 127retries and so on, and how often to do so.
107 128
108=item headers => hashref 129=item headers => hashref
109 130
110The request headers to use. 131The request headers to use. Currently, C<http_request> may provide its
132own C<Host:>, C<Content-Length:>, C<Connection:> and C<Cookie:> headers
133and will provide defaults for C<User-Agent:> and C<Referer:>.
111 134
112=item timeout => $seconds 135=item timeout => $seconds
113 136
114The time-out to use for various stages - each connect attempt will reset 137The time-out to use for various stages - each connect attempt will reset
115the timeout, as will read or write activity. Default timeout is 5 minutes. 138the timeout, as will read or write activity. Default timeout is 5 minutes.
165 } 188 }
166 ; 189 ;
167 190
168=cut 191=cut
169 192
193sub _slot_schedule;
194sub _slot_schedule($) {
195 my $host = shift;
196
197 while ($CO_SLOT{$host}[0] < $MAX_PER_HOST) {
198 if (my $cb = shift @{ $CO_SLOT{$host}[1] }) {
199 # somebody wants that slot
200 ++$CO_SLOT{$host}[0];
201 ++$ACTIVE;
202
203 $cb->(AnyEvent::Util::guard {
204 --$ACTIVE;
205 --$CO_SLOT{$host}[0];
206 _slot_schedule $host;
207 });
208 } else {
209 # nobody wants the slot, maybe we can forget about it
210 delete $CO_SLOT{$host} unless $CO_SLOT{$host}[0];
211 last;
212 }
213 }
214}
215
216# wait for a free slot on host, call callback
217sub _get_slot($$) {
218 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
219
220 _slot_schedule $_[0];
221}
222
170sub http_request($$$;@) { 223sub http_request($$@) {
171 my $cb = pop; 224 my $cb = pop;
172 my ($method, $url, %arg) = @_; 225 my ($method, $url, %arg) = @_;
173 226
174 my %hdr; 227 my %hdr;
175 228
181 } 234 }
182 } 235 }
183 236
184 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE; 237 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE;
185 238
186 return $cb->(undef, { Status => 599, Reason => "recursion limit reached" }) 239 return $cb->(undef, { Status => 599, Reason => "recursion limit reached", URL => $url })
187 if $recurse < 0; 240 if $recurse < 0;
188 241
189 my $proxy = $arg{proxy} || $PROXY; 242 my $proxy = $arg{proxy} || $PROXY;
190 my $timeout = $arg{timeout} || $TIMEOUT; 243 my $timeout = $arg{timeout} || $TIMEOUT;
191 244
196 249
197 $scheme = lc $scheme; 250 $scheme = lc $scheme;
198 251
199 my $uport = $scheme eq "http" ? 80 252 my $uport = $scheme eq "http" ? 80
200 : $scheme eq "https" ? 443 253 : $scheme eq "https" ? 443
201 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported" }); 254 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported", URL => $url });
255
256 $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic
202 257
203 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 258 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
204 or return $cb->(undef, { Status => 599, Reason => "unparsable URL" }); 259 or return $cb->(undef, { Status => 599, Reason => "unparsable URL", URL => $url });
205 260
206 my $uhost = $1; 261 my $uhost = $1;
207 $uport = $2 if defined $2; 262 $uport = $2 if defined $2;
208 263
209 $uhost =~ s/^\[(.*)\]$/$1/; 264 $uhost =~ s/^\[(.*)\]$/$1/;
245 $hdr{host} = $uhost; 300 $hdr{host} = $uhost;
246 } 301 }
247 302
248 $hdr{"content-length"} = length $arg{body}; 303 $hdr{"content-length"} = length $arg{body};
249 304
250 my %state; 305 my %state = (connect_guard => 1);
251 306
307 _get_slot $uhost, sub {
308 $state{slot_guard} = shift;
309
310 return unless $state{connect_guard};
311
252 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 312 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
253 $state{fh} = shift 313 $state{fh} = shift
254 or return $cb->(undef, { Status => 599, Reason => "$!" }); 314 or return $cb->(undef, { Status => 599, Reason => "$!", URL => $url });
255 315
256 delete $state{connect_guard}; # reduce memory usage, save a tree 316 delete $state{connect_guard}; # reduce memory usage, save a tree
257 317
258 # get handle 318 # get handle
259 $state{handle} = new AnyEvent::Handle 319 $state{handle} = new AnyEvent::Handle
260 fh => $state{fh}, 320 fh => $state{fh},
261 ($scheme eq "https" ? (tls => "connect") : ()); 321 ($scheme eq "https" ? (tls => "connect") : ());
262 322
263 # limit the number of persistent connections 323 # limit the number of persistent connections
264 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 324 if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
265 ++$KA_COUNT{$_[1]}; 325 ++$KA_COUNT{$_[1]};
266 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} }; 326 $state{handle}{ka_count_guard} = AnyEvent::Util::guard { --$KA_COUNT{$_[1]} };
267 $hdr{connection} = "keep-alive"; 327 $hdr{connection} = "keep-alive";
268 delete $hdr{connection}; # keep-alive not yet supported 328 delete $hdr{connection}; # keep-alive not yet supported
269 } else { 329 } else {
270 delete $hdr{connection}; 330 delete $hdr{connection};
271 } 331 }
272 332
273 # (re-)configure handle 333 # (re-)configure handle
274 $state{handle}->timeout ($timeout); 334 $state{handle}->timeout ($timeout);
275 $state{handle}->on_error (sub { 335 $state{handle}->on_error (sub {
336 my $errno = "$!";
276 %state = (); 337 %state = ();
277 $cb->(undef, { Status => 599, Reason => "$!" }); 338 $cb->(undef, { Status => 599, Reason => $errno, URL => $url });
278 }); 339 });
279 $state{handle}->on_eof (sub { 340 $state{handle}->on_eof (sub {
280 %state = (); 341 %state = ();
281 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 342 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file", URL => $url });
282 }); 343 });
283 344
284 # send request 345 # send request
285 $state{handle}->push_write ( 346 $state{handle}->push_write (
286 "$method $rpath HTTP/1.0\015\012" 347 "$method $rpath HTTP/1.0\015\012"
287 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr) 348 . (join "", map "$_: $hdr{$_}\015\012", keys %hdr)
288 . "\015\012" 349 . "\015\012"
289 . (delete $arg{body}) 350 . (delete $arg{body})
290 );
291
292 %hdr = (); # reduce memory usage, save a kitten
293
294 # status line
295 $state{handle}->push_read (line => qr/\015?\012/, sub {
296 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
297 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" }));
298
299 my %hdr = ( # response headers
300 HTTPVersion => "\x00$1",
301 Status => "\x00$2",
302 Reason => "\x00$3",
303 ); 351 );
304 352
353 %hdr = (); # reduce memory usage, save a kitten
354
355 # status line
356 $state{handle}->push_read (line => qr/\015?\012/, sub {
357 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix
358 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url }));
359
360 my %hdr = ( # response headers
361 HTTPVersion => "\x00$1",
362 Status => "\x00$2",
363 Reason => "\x00$3",
364 URL => "\x00$url"
365 );
366
305 # headers, could be optimized a bit 367 # headers, could be optimized a bit
306 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 368 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
307 for ("$_[1]\012") { 369 for ("$_[1]\012") {
308 # we support spaces in field names, as lotus domino 370 # we support spaces in field names, as lotus domino
309 # creates them. 371 # creates them.
310 $hdr{lc $1} .= "\x00$2" 372 $hdr{lc $1} .= "\x00$2"
311 while /\G 373 while /\G
312 ([^:\000-\037]+): 374 ([^:\000-\037]+):
313 [\011\040]* 375 [\011\040]*
314 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 376 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
315 \015?\012 377 \015?\012
316 /gxc; 378 /gxc;
317 379
318 /\G$/ 380 /\G$/
319 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" })); 381 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers", URL => $url }));
320 } 382 }
321 383
322 substr $_, 0, 1, "" 384 substr $_, 0, 1, ""
323 for values %hdr; 385 for values %hdr;
324 386
325 my $finish = sub { 387 my $finish = sub {
326 %state = (); 388 %state = ();
327 389
328 # set-cookie processing 390 # set-cookie processing
329 if ($arg{cookie_jar} && exists $hdr{"set-cookie"}) { 391 if ($arg{cookie_jar} && exists $hdr{"set-cookie"}) {
330 for (split /\x00/, $hdr{"set-cookie"}) { 392 for (split /\x00/, $hdr{"set-cookie"}) {
331 my ($cookie, @arg) = split /;\s*/; 393 my ($cookie, @arg) = split /;\s*/;
332 my ($name, $value) = split /=/, $cookie, 2; 394 my ($name, $value) = split /=/, $cookie, 2;
333 my %kv = (value => $value, map { split /=/, $_, 2 } @arg); 395 my %kv = (value => $value, map { split /=/, $_, 2 } @arg);
334 396
335 my $cdom = (delete $kv{domain}) || $uhost; 397 my $cdom = (delete $kv{domain}) || $uhost;
336 my $cpath = (delete $kv{path}) || "/"; 398 my $cpath = (delete $kv{path}) || "/";
337 399
338 $cdom =~ s/^.?/./; # make sure it starts with a "." 400 $cdom =~ s/^.?/./; # make sure it starts with a "."
339 401
402 next if $cdom =~ /\.$/;
403
404 # this is not rfc-like and not netscape-like. go figure.
340 my $ndots = $cdom =~ y/.//; 405 my $ndots = $cdom =~ y/.//;
341 next if $ndots < ($cdom =~ /[^.]{3}$/ ? 2 : 3); 406 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
342 407
343 # store it 408 # store it
344 $arg{cookie_jar}{version} = 1; 409 $arg{cookie_jar}{version} = 1;
345 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv; 410 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
411 }
412 }
413
414 if ($_[1]{Status} =~ /^30[12]$/ && $recurse) {
415 # microsoft and other assholes don't give a shit for following standards,
416 # try to support a common form of broken Location header.
417 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
418
419 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
420 } else {
421 $cb->($_[0], $_[1]);
422 }
423 };
424
425 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
426 $finish->(undef, \%hdr);
427 } else {
428 if (exists $hdr{"content-length"}) {
429 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
430 # could cache persistent connection now
431 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
432 # but we don't, due to misdesigns, this is annoyingly complex
433 };
434
435 $finish->($_[1], \%hdr);
436 });
437 } else {
438 # too bad, need to read until we get an error or EOF,
439 # no way to detect winged data.
440 $_[0]->on_error (sub {
441 $finish->($_[0]{rbuf}, \%hdr);
442 });
443 $_[0]->on_eof (undef);
444 $_[0]->on_read (sub { });
346 } 445 }
347 } 446 }
348
349 if ($_[1]{Status} =~ /^x30[12]$/ && $recurse) {
350 # microsoft and other assholes don't give a shit for following standards,
351 # try to support a common form of broken Location header.
352 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%;
353
354 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
355 } else {
356 $cb->($_[0], $_[1]);
357 }
358 }; 447 });
359
360 if ($hdr{Status} =~ /^(?:1..|204|304)$/ or $method eq "HEAD") {
361 $finish->(undef, \%hdr);
362 } else {
363 if (exists $hdr{"content-length"}) {
364 $_[0]->unshift_read (chunk => $hdr{"content-length"}, sub {
365 # could cache persistent connection now
366 if ($hdr{connection} =~ /\bkeep-alive\b/i) {
367 # but we don't, due to misdesigns, this is annoyingly complex
368 };
369
370 $finish->($_[1], \%hdr);
371 });
372 } else {
373 # too bad, need to read until we get an error or EOF,
374 # no way to detect winged data.
375 $_[0]->on_error (sub {
376 $finish->($_[0]{rbuf}, \%hdr);
377 });
378 $_[0]->on_eof (undef);
379 $_[0]->on_read (sub { });
380 }
381 }
382 }); 448 });
449 }, sub {
450 $timeout
383 }); 451 };
384 }, sub {
385 $timeout
386 }; 452 };
387 453
388 defined wantarray && AnyEvent::Util::guard { %state = () } 454 defined wantarray && AnyEvent::Util::guard { %state = () }
389} 455}
390 456
391sub http_get($$;@) { 457sub http_get($@) {
392 unshift @_, "GET"; 458 unshift @_, "GET";
393 &http_request 459 &http_request
394} 460}
395 461
396sub http_head($$;@) { 462sub http_head($@) {
397 unshift @_, "HEAD"; 463 unshift @_, "HEAD";
398 &http_request 464 &http_request
399} 465}
400 466
401sub http_post($$$;@) { 467sub http_post($$@) {
402 unshift @_, "POST", "body"; 468 unshift @_, "POST", "body";
403 &http_request 469 &http_request
404} 470}
405 471
406=back 472=back
433 499
434The maximum time to cache a persistent connection, in seconds (default: 2). 500The maximum time to cache a persistent connection, in seconds (default: 2).
435 501
436Not implemented currently. 502Not implemented currently.
437 503
504=item $AnyEvent::HTTP::ACTIVE
505
506The number of active connections. This is not the number of currently
507running requests, but the number of currently open and non-idle TCP
508connections. This number of can be useful for load-leveling.
509
438=back 510=back
439 511
440=cut 512=cut
441 513
442sub set_proxy($) { 514sub set_proxy($) {
450 522
451L<AnyEvent>. 523L<AnyEvent>.
452 524
453=head1 AUTHOR 525=head1 AUTHOR
454 526
455 Marc Lehmann <schmorp@schmorp.de> 527 Marc Lehmann <schmorp@schmorp.de>
456 http://home.schmorp.de/ 528 http://home.schmorp.de/
457 529
458=cut 530=cut
459 531
4601 5321
461 533

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines