ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.56 by root, Mon Sep 6 05:30:54 2010 UTC vs.
Revision 1.59 by root, Wed Dec 29 23:59:36 2010 UTC

43 43
44use Errno (); 44use Errno ();
45 45
46use AnyEvent 5.0 (); 46use AnyEvent 5.0 ();
47use AnyEvent::Util (); 47use AnyEvent::Util ();
48use AnyEvent::Socket ();
49use AnyEvent::Handle (); 48use AnyEvent::Handle ();
50 49
51use base Exporter::; 50use base Exporter::;
52 51
53our $VERSION = '1.45'; 52our $VERSION = '1.46';
54 53
55our @EXPORT = qw(http_get http_post http_head http_request); 54our @EXPORT = qw(http_get http_post http_head http_request);
56 55
57our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)"; 56our $USERAGENT = "Mozilla/5.0 (compatible; U; AnyEvent-HTTP/$VERSION; +http://software.schmorp.de/pkg/AnyEvent)";
58our $MAX_RECURSE = 10; 57our $MAX_RECURSE = 10;
92must be an absolute http or https URL. 91must be an absolute http or https URL.
93 92
94When called in void context, nothing is returned. In other contexts, 93When called in void context, nothing is returned. In other contexts,
95C<http_request> returns a "cancellation guard" - you have to keep the 94C<http_request> returns a "cancellation guard" - you have to keep the
96object at least alive until the callback get called. If the object gets 95object at least alive until the callback get called. If the object gets
97destroyed before the callbakc is called, the request will be cancelled. 96destroyed before the callback is called, the request will be cancelled.
98 97
99The callback will be called with the response body data as first argument 98The callback will be called with the response body data as first argument
100(or C<undef> if an error occured), and a hash-ref with response headers as 99(or C<undef> if an error occured), and a hash-ref with response headers as
101second argument. 100second argument.
102 101
211connect (for exmaple, to bind it on a given IP address). This parameter 210connect (for exmaple, to bind it on a given IP address). This parameter
212overrides the prepare callback passed to C<AnyEvent::Socket::tcp_connect> 211overrides the prepare callback passed to C<AnyEvent::Socket::tcp_connect>
213and behaves exactly the same way (e.g. it has to provide a 212and behaves exactly the same way (e.g. it has to provide a
214timeout). See the description for the C<$prepare_cb> argument of 213timeout). See the description for the C<$prepare_cb> argument of
215C<AnyEvent::Socket::tcp_connect> for details. 214C<AnyEvent::Socket::tcp_connect> for details.
215
216=item tcp_connect => $callback->($host, $service, $connect_cb, $prepare_cb)
217
218In even rarer cases you want total control over how AnyEvent::HTTP
219establishes connections. Normally it uses L<AnyEvent::Socket::tcp_connect>
220to do this, but you can provide your own C<tcp_connect> function -
221obviously, it has to follow the same calling conventions.
222
223There are probably lots of weird uses for this function, starting from
224tracing the hosts C<http_request> actually tries to connect, to (inexact
225but fast) host => IP address caching or even socks protocol support.
216 226
217=item on_header => $callback->($headers) 227=item on_header => $callback->($headers)
218 228
219When specified, this callback will be called with the header hash as soon 229When specified, this callback will be called with the header hash as soon
220as headers have been successfully received from the remote server (not on 230as headers have been successfully received from the remote server (not on
338 push @{ $CO_SLOT{$_[0]}[1] }, $_[1]; 348 push @{ $CO_SLOT{$_[0]}[1] }, $_[1];
339 349
340 _slot_schedule $_[0]; 350 _slot_schedule $_[0];
341} 351}
342 352
343our $qr_nl = qr{\015?\012};
344our $qr_nlnl = qr{(?<![^\012])\015?\012}; 353our $qr_nlnl = qr{(?<![^\012])\015?\012};
345 354
346our $TLS_CTX_LOW = { cache => 1, sslv2 => 1 }; 355our $TLS_CTX_LOW = { cache => 1, sslv2 => 1 };
347our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_peername => "https" }; 356our $TLS_CTX_HIGH = { cache => 1, verify => 1, verify_peername => "https" };
348 357
455 _get_slot $uhost, sub { 464 _get_slot $uhost, sub {
456 $state{slot_guard} = shift; 465 $state{slot_guard} = shift;
457 466
458 return unless $state{connect_guard}; 467 return unless $state{connect_guard};
459 468
460 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 469 my $tcp_connect = $arg{tcp_connect}
470 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
471
472 $state{connect_guard} = $tcp_connect->(
473 $rhost,
474 $rport,
475 sub {
461 $state{fh} = shift 476 $state{fh} = shift
462 or do { 477 or do {
463 my $err = "$!"; 478 my $err = "$!";
464 %state = (); 479 %state = ();
465 return $cb->(undef, { Status => 599, Reason => $err, @pseudo }); 480 return $cb->(undef, { Status => 599, Reason => $err, @pseudo });
466 }; 481 };
467 482
468 pop; # free memory, save a tree 483 pop; # free memory, save a tree
469 484
470 return unless delete $state{connect_guard}; 485 return unless delete $state{connect_guard};
471 486
472 # get handle 487 # get handle
473 $state{handle} = new AnyEvent::Handle 488 $state{handle} = new AnyEvent::Handle
474 fh => $state{fh}, 489 fh => $state{fh},
475 peername => $rhost, 490 peername => $rhost,
476 tls_ctx => $arg{tls_ctx}, 491 tls_ctx => $arg{tls_ctx},
477 # these need to be reconfigured on keepalive handles 492 # these need to be reconfigured on keepalive handles
478 timeout => $timeout, 493 timeout => $timeout,
479 on_error => sub { 494 on_error => sub {
480 %state = (); 495 %state = ();
481 $cb->(undef, { Status => 599, Reason => $_[2], @pseudo }); 496 $cb->(undef, { Status => 599, Reason => $_[2], @pseudo });
482 }, 497 },
483 on_eof => sub { 498 on_eof => sub {
484 %state = (); 499 %state = ();
485 $cb->(undef, { Status => 599, Reason => "Unexpected end-of-file", @pseudo }); 500 $cb->(undef, { Status => 599, Reason => "Unexpected end-of-file", @pseudo });
486 }, 501 },
487 ; 502 ;
488 503
489 # limit the number of persistent connections 504 # limit the number of persistent connections
490 # keepalive not yet supported 505 # keepalive not yet supported
491# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 506# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
492# ++$KA_COUNT{$_[1]}; 507# ++$KA_COUNT{$_[1]};
493# $state{handle}{ka_count_guard} = AnyEvent::Util::guard { 508# $state{handle}{ka_count_guard} = AnyEvent::Util::guard {
494# --$KA_COUNT{$_[1]} 509# --$KA_COUNT{$_[1]}
495# }; 510# };
496# $hdr{connection} = "keep-alive"; 511# $hdr{connection} = "keep-alive";
497# } else { 512# } else {
498 delete $hdr{connection}; 513 delete $hdr{connection};
499# } 514# }
500 515
501 $state{handle}->starttls ("connect") if $rscheme eq "https"; 516 $state{handle}->starttls ("connect") if $rscheme eq "https";
502 517
503 # handle actual, non-tunneled, request 518 # handle actual, non-tunneled, request
504 my $handle_actual_request = sub { 519 my $handle_actual_request = sub {
505 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; 520 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
506 521
507 # send request 522 # send request
508 $state{handle}->push_write ( 523 $state{handle}->push_write (
509 "$method $rpath HTTP/1.0\015\012" 524 "$method $rpath HTTP/1.0\015\012"
510 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr) 525 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
511 . "\015\012" 526 . "\015\012"
512 . (delete $arg{body}) 527 . (delete $arg{body})
513 ); 528 );
514 529
515 # return if error occured during push_write() 530 # return if error occured during push_write()
516 return unless %state; 531 return unless %state;
517 532
518 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use 533 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
519 534
520 # status line 535 # status line and headers
521 $state{handle}->push_read (line => $qr_nl, sub {
522 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
523 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid server response ($_[1])", @pseudo }));
524
525 push @pseudo,
526 HTTPVersion => $1,
527 Status => $2,
528 Reason => $3,
529 ;
530
531 # headers, could be optimized a bit
532 $state{handle}->unshift_read (line => $qr_nlnl, sub { 536 $state{handle}->push_read (line => $qr_nlnl, sub {
533 for ("$_[1]") { 537 for ("$_[1]") {
534 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 538 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
539
540 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )? \015?\012/igxc
541 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid server response", @pseudo }));
542
543 push @pseudo,
544 HTTPVersion => $1,
545 Status => $2,
546 Reason => $3,
547 ;
535 548
536 # things seen, not parsed: 549 # things seen, not parsed:
537 # p3pP="NON CUR OTPi OUR NOR UNI" 550 # p3pP="NON CUR OTPi OUR NOR UNI"
538 551
539 $hdr{lc $1} .= ",$2" 552 $hdr{lc $1} .= ",$2"
574 my $redirect; 587 my $redirect;
575 588
576 if ($recurse) { 589 if ($recurse) {
577 my $status = $hdr{Status}; 590 my $status = $hdr{Status};
578 591
579 if (($status == 301 || $status == 302) && $method ne "POST") { 592 # industry standard is to redirect POST as GET for
580 # apparently, mozilla et al. just change POST to GET here 593 # 301, 302 and 303, in contrast to http/1.0 and 1.1.
581 # more research is needed before we do the same 594 # also, the UA should ask the user for 301 and 307 and POST,
582 $redirect = 1; 595 # industry standard seems to be to simply follow.
583 } elsif ($status == 303) { 596 # we go with the industry standard.
597 if ($status == 301 or $status == 302 or $status == 303) {
584 # even http/1.1 is unclear on how to mutate the method 598 # HTTP/1.1 is unclear on how to mutate the method
585 $method = "GET" unless $method eq "HEAD"; 599 $method = "GET" unless $method eq "HEAD";
586 $redirect = 1; 600 $redirect = 1;
587 } elsif ($status == 307 && $method =~ /^(?:GET|HEAD)$/) { 601 } elsif ($status == 307) {
588 $redirect = 1; 602 $redirect = 1;
589 } 603 }
590 } 604 }
591 605
592 my $finish = sub { 606 my $finish = sub {
672 } else { 686 } else {
673 # body handling, four different code paths 687 # body handling, four different code paths
674 # for want_body_handle, on_body (2x), normal (2x) 688 # for want_body_handle, on_body (2x), normal (2x)
675 # we might read too much here, but it does not matter yet (no pers. connections) 689 # we might read too much here, but it does not matter yet (no pers. connections)
676 if (!$redirect && $arg{want_body_handle}) { 690 if (!$redirect && $arg{want_body_handle}) {
677 $_[0]->on_eof (undef); 691 $_[0]->on_eof (undef);
678 $_[0]->on_error (undef); 692 $_[0]->on_error (undef);
679 $_[0]->on_read (undef); 693 $_[0]->on_read (undef);
680 694
681 $finish->(delete $state{handle}, \%hdr); 695 $finish->(delete $state{handle}, \%hdr);
682 696
711 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), \%hdr) 725 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), \%hdr)
712 if $len <= length $_[0]{rbuf}; 726 if $len <= length $_[0]{rbuf};
713 }); 727 });
714 } else { 728 } else {
715 $_[0]->on_error (sub { 729 $_[0]->on_error (sub {
716 $! == Errno::EPIPE || !$! 730 ($! == Errno::EPIPE || !$!)
717 ? $finish->(delete $_[0]{rbuf}, \%hdr) 731 ? $finish->(delete $_[0]{rbuf}, \%hdr)
718 : $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }); 732 : $finish->(undef, { Status => 599, Reason => $_[2], @pseudo });
719 }); 733 });
720 $_[0]->on_read (sub { }); 734 $_[0]->on_read (sub { });
721 } 735 }
722 } 736 }
723 } 737 }
724 }); 738 });
725 }); 739 };
726 };
727 740
728 # now handle proxy-CONNECT method 741 # now handle proxy-CONNECT method
729 if ($proxy && $uscheme eq "https") { 742 if ($proxy && $uscheme eq "https") {
730 # oh dear, we have to wrap it into a connect request 743 # oh dear, we have to wrap it into a connect request
731 744
732 # maybe re-use $uauthority with patched port? 745 # maybe re-use $uauthority with patched port?
733 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012"); 746 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012");
734 $state{handle}->push_read (line => $qr_nlnl, sub { 747 $state{handle}->push_read (line => $qr_nlnl, sub {
735 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 748 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
736 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid proxy connect response ($_[1])", @pseudo })); 749 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid proxy connect response ($_[1])", @pseudo }));
737 750
738 if ($2 == 200) { 751 if ($2 == 200) {
739 $rpath = $upath; 752 $rpath = $upath;
740 &$handle_actual_request; 753 &$handle_actual_request;
741 } else { 754 } else {
742 %state = (); 755 %state = ();
743 $cb->(undef, { Status => $2, Reason => $3, @pseudo }); 756 $cb->(undef, { Status => $2, Reason => $3, @pseudo });
757 }
744 } 758 });
745 });
746 } else { 759 } else {
747 &$handle_actual_request; 760 &$handle_actual_request;
761 }
762
748 } 763 },
749
750 }, $arg{on_prepare} || sub { $timeout }; 764 $arg{on_prepare} || sub { $timeout }
765 );
751 }; 766 };
752 767
753 defined wantarray && AnyEvent::Util::guard { %state = () } 768 defined wantarray && AnyEvent::Util::guard { %state = () }
754} 769}
755 770

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines