ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.15 by elmex, Fri Jun 6 10:12:44 2008 UTC vs.
Revision 1.25 by root, Mon Jul 21 05:42:07 2008 UTC

3AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client 3AnyEvent::HTTP - simple but non-blocking HTTP/HTTPS client
4 4
5=head1 SYNOPSIS 5=head1 SYNOPSIS
6 6
7 use AnyEvent::HTTP; 7 use AnyEvent::HTTP;
8
9 http_get "http://www.nethype.de/", sub { print $_[1] };
10
11 # ... do something else here
8 12
9=head1 DESCRIPTION 13=head1 DESCRIPTION
10 14
11This module is an L<AnyEvent> user, you need to make sure that you use and 15This module is an L<AnyEvent> user, you need to make sure that you use and
12run a supported event loop. 16run a supported event loop.
44use AnyEvent::Socket (); 48use AnyEvent::Socket ();
45use AnyEvent::Handle (); 49use AnyEvent::Handle ();
46 50
47use base Exporter::; 51use base Exporter::;
48 52
49our $VERSION = '1.1'; 53our $VERSION = '1.03';
50 54
51our @EXPORT = qw(http_get http_request); 55our @EXPORT = qw(http_get http_post http_head http_request);
52 56
53our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 57our $USERAGENT = "Mozilla/5.0 (compatible; AnyEvent::HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
54our $MAX_RECURSE = 10; 58our $MAX_RECURSE = 10;
55our $MAX_PERSISTENT = 8; 59our $MAX_PERSISTENT = 8;
56our $PERSISTENT_TIMEOUT = 2; 60our $PERSISTENT_TIMEOUT = 2;
89The callback will be called with the response data as first argument 93The callback will be called with the response data as first argument
90(or C<undef> if it wasn't available due to errors), and a hash-ref with 94(or C<undef> if it wasn't available due to errors), and a hash-ref with
91response headers as second argument. 95response headers as second argument.
92 96
93All the headers in that hash are lowercased. In addition to the response 97All the headers in that hash are lowercased. In addition to the response
94headers, the three "pseudo-headers" C<HTTPVersion>, C<Status> and 98headers, the "pseudo-headers" C<HTTPVersion>, C<Status> and C<Reason>
95C<Reason> contain the three parts of the HTTP Status-Line of the same 99contain the three parts of the HTTP Status-Line of the same name. The
100pseudo-header C<URL> contains the original URL (which can differ from the
101requested URL when following redirects).
102
96name. If the server sends a header multiple lines, then their contents 103If the server sends a header multiple lines, then their contents will be
97will be joined together with C<\x00>. 104joined together with C<\x00>.
98 105
99If an internal error occurs, such as not being able to resolve a hostname, 106If an internal error occurs, such as not being able to resolve a hostname,
100then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599> 107then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<599>
101and the C<Reason> pseudo-header will contain an error message. 108and the C<Reason> pseudo-header will contain an error message.
102 109
228 while (my ($k, $v) = each %$hdr) { 235 while (my ($k, $v) = each %$hdr) {
229 $hdr{lc $k} = $v; 236 $hdr{lc $k} = $v;
230 } 237 }
231 } 238 }
232 239
233 my $recurse = exists $arg{recurse} ? $arg{recurse} : $MAX_RECURSE; 240 my $recurse = exists $arg{recurse} ? delete $arg{recurse} : $MAX_RECURSE;
234 241
235 return $cb->(undef, { Status => 599, Reason => "recursion limit reached" }) 242 return $cb->(undef, { Status => 599, Reason => "recursion limit reached", URL => $url })
236 if $recurse < 0; 243 if $recurse < 0;
237 244
238 my $proxy = $arg{proxy} || $PROXY; 245 my $proxy = $arg{proxy} || $PROXY;
239 my $timeout = $arg{timeout} || $TIMEOUT; 246 my $timeout = $arg{timeout} || $TIMEOUT;
240 247
245 252
246 $scheme = lc $scheme; 253 $scheme = lc $scheme;
247 254
248 my $uport = $scheme eq "http" ? 80 255 my $uport = $scheme eq "http" ? 80
249 : $scheme eq "https" ? 443 256 : $scheme eq "https" ? 443
250 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported" }); 257 : return $cb->(undef, { Status => 599, Reason => "only http and https URL schemes supported", URL => $url });
251 258
252 $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic 259 $hdr{referer} ||= "$scheme://$authority$upath"; # leave out fragment and query string, just a heuristic
253 260
254 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x 261 $authority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
255 or return $cb->(undef, { Status => 599, Reason => "unparsable URL" }); 262 or return $cb->(undef, { Status => 599, Reason => "unparsable URL", URL => $url });
256 263
257 my $uhost = $1; 264 my $uhost = $1;
258 $uport = $2 if defined $2; 265 $uport = $2 if defined $2;
259 266
260 $uhost =~ s/^\[(.*)\]$/$1/; 267 $uhost =~ s/^\[(.*)\]$/$1/;
305 312
306 return unless $state{connect_guard}; 313 return unless $state{connect_guard};
307 314
308 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 315 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
309 $state{fh} = shift 316 $state{fh} = shift
310 or return $cb->(undef, { Status => 599, Reason => "$!" }); 317 or return $cb->(undef, { Status => 599, Reason => "$!", URL => $url });
311 318
312 delete $state{connect_guard}; # reduce memory usage, save a tree 319 delete $state{connect_guard}; # reduce memory usage, save a tree
313 320
314 # get handle 321 # get handle
315 $state{handle} = new AnyEvent::Handle 322 $state{handle} = new AnyEvent::Handle
329 # (re-)configure handle 336 # (re-)configure handle
330 $state{handle}->timeout ($timeout); 337 $state{handle}->timeout ($timeout);
331 $state{handle}->on_error (sub { 338 $state{handle}->on_error (sub {
332 my $errno = "$!"; 339 my $errno = "$!";
333 %state = (); 340 %state = ();
334 $cb->(undef, { Status => 599, Reason => $errno }); 341 $cb->(undef, { Status => 599, Reason => $errno, URL => $url });
335 }); 342 });
336 $state{handle}->on_eof (sub { 343 $state{handle}->on_eof (sub {
337 %state = (); 344 %state = ();
338 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file" }); 345 $cb->(undef, { Status => 599, Reason => "unexpected end-of-file", URL => $url });
339 }); 346 });
340 347
341 # send request 348 # send request
342 $state{handle}->push_write ( 349 $state{handle}->push_write (
343 "$method $rpath HTTP/1.0\015\012" 350 "$method $rpath HTTP/1.0\015\012"
348 355
349 %hdr = (); # reduce memory usage, save a kitten 356 %hdr = (); # reduce memory usage, save a kitten
350 357
351 # status line 358 # status line
352 $state{handle}->push_read (line => qr/\015?\012/, sub { 359 $state{handle}->push_read (line => qr/\015?\012/, sub {
353 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) \s+ ([^\015\012]+)/ix 360 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
354 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])" })); 361 or return (%state = (), $cb->(undef, { Status => 599, Reason => "invalid server response ($_[1])", URL => $url }));
355 362
356 my %hdr = ( # response headers 363 my %hdr = ( # response headers
357 HTTPVersion => "\x00$1", 364 HTTPVersion => "\x00$1",
358 Status => "\x00$2", 365 Status => "\x00$2",
359 Reason => "\x00$3", 366 Reason => "\x00$3",
367 URL => "\x00$url"
360 ); 368 );
361 369
362 # headers, could be optimized a bit 370 # headers, could be optimized a bit
363 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub { 371 $state{handle}->unshift_read (line => qr/\015?\012\015?\012/, sub {
364 for ("$_[1]\012") { 372 for ("$_[1]\012") {
371 ((?: [^\015\012]+ | \015?\012[\011\040] )*) 379 ((?: [^\015\012]+ | \015?\012[\011\040] )*)
372 \015?\012 380 \015?\012
373 /gxc; 381 /gxc;
374 382
375 /\G$/ 383 /\G$/
376 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers" })); 384 or return (%state = (), $cb->(undef, { Status => 599, Reason => "garbled response headers", URL => $url }));
377 } 385 }
378 386
379 substr $_, 0, 1, "" 387 substr $_, 0, 1, ""
380 for values %hdr; 388 for values %hdr;
381 389
404 $arg{cookie_jar}{version} = 1; 412 $arg{cookie_jar}{version} = 1;
405 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv; 413 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
406 } 414 }
407 } 415 }
408 416
409 if ($_[1]{Status} =~ /^30[12]$/ && $recurse) {
410 # microsoft and other assholes don't give a shit for following standards, 417 # microsoft and other shitheads don't give a shit for following standards,
411 # try to support a common form of broken Location header. 418 # try to support some common forms of broken Location headers.
419 if ($_[1]{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
420 $_[1]{location} =~ s/^\.\/+//;
421
412 $_[1]{location} =~ s%^/%$scheme://$uhost:$uport/%; 422 my $url = "$scheme://$uhost:$uport";
413 423
424 unless ($_[1]{location} =~ s/^\///) {
425 $url .= $upath;
426 $url =~ s/\/[^\/]*$//;
427 }
428
429 $_[1]{location} = "$url/$_[1]{location}";
430 }
431
432 if ($_[1]{Status} =~ /^30[12]$/ && $recurse && $method ne "POST") {
433 # apparently, mozilla et al. just change POST to GET here
434 # more research is needed before we do the same
414 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb); 435 http_request ($method, $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
436 } elsif ($_[1]{Status} == 303 && $recurse) {
437 # even http/1.1 is unlear on how to mutate the method
438 $method = "GET" unless $method eq "HEAD";
439 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
440 } elsif ($_[1]{Status} == 307 && $recurse && $method =~ /^(?:GET|HEAD)$/) {
441 http_request ($method => $_[1]{location}, %arg, recurse => $recurse - 1, $cb);
415 } else { 442 } else {
416 $cb->($_[0], $_[1]); 443 $cb->($_[0], $_[1]);
417 } 444 }
418 }; 445 };
419 446
458 unshift @_, "HEAD"; 485 unshift @_, "HEAD";
459 &http_request 486 &http_request
460} 487}
461 488
462sub http_post($$@) { 489sub http_post($$@) {
490 my $url = shift;
463 unshift @_, "POST", "body"; 491 unshift @_, "POST", $url, "body";
464 &http_request 492 &http_request
465} 493}
466 494
467=back 495=back
468 496
517 545
518L<AnyEvent>. 546L<AnyEvent>.
519 547
520=head1 AUTHOR 548=head1 AUTHOR
521 549
522 Marc Lehmann <schmorp@schmorp.de> 550 Marc Lehmann <schmorp@schmorp.de>
523 http://home.schmorp.de/ 551 http://home.schmorp.de/
524 552
525=cut 553=cut
526 554
5271 5551
528 556

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines