ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.58 by root, Sun Nov 14 20:23:00 2010 UTC vs.
Revision 1.59 by root, Wed Dec 29 23:59:36 2010 UTC

43 43
44use Errno (); 44use Errno ();
45 45
46use AnyEvent 5.0 (); 46use AnyEvent 5.0 ();
47use AnyEvent::Util (); 47use AnyEvent::Util ();
48use AnyEvent::Socket ();
49use AnyEvent::Handle (); 48use AnyEvent::Handle ();
50 49
51use base Exporter::; 50use base Exporter::;
52 51
53our $VERSION = '1.46'; 52our $VERSION = '1.46';
211connect (for exmaple, to bind it on a given IP address). This parameter 210connect (for exmaple, to bind it on a given IP address). This parameter
212overrides the prepare callback passed to C<AnyEvent::Socket::tcp_connect> 211overrides the prepare callback passed to C<AnyEvent::Socket::tcp_connect>
213and behaves exactly the same way (e.g. it has to provide a 212and behaves exactly the same way (e.g. it has to provide a
214timeout). See the description for the C<$prepare_cb> argument of 213timeout). See the description for the C<$prepare_cb> argument of
215C<AnyEvent::Socket::tcp_connect> for details. 214C<AnyEvent::Socket::tcp_connect> for details.
215
216=item tcp_connect => $callback->($host, $service, $connect_cb, $prepare_cb)
217
218In even rarer cases you want total control over how AnyEvent::HTTP
219establishes connections. Normally it uses L<AnyEvent::Socket::tcp_connect>
220to do this, but you can provide your own C<tcp_connect> function -
221obviously, it has to follow the same calling conventions.
222
223There are probably lots of weird uses for this function, starting from
224tracing the hosts C<http_request> actually tries to connect, to (inexact
225but fast) host => IP address caching or even socks protocol support.
216 226
217=item on_header => $callback->($headers) 227=item on_header => $callback->($headers)
218 228
219When specified, this callback will be called with the header hash as soon 229When specified, this callback will be called with the header hash as soon
220as headers have been successfully received from the remote server (not on 230as headers have been successfully received from the remote server (not on
454 _get_slot $uhost, sub { 464 _get_slot $uhost, sub {
455 $state{slot_guard} = shift; 465 $state{slot_guard} = shift;
456 466
457 return unless $state{connect_guard}; 467 return unless $state{connect_guard};
458 468
459 $state{connect_guard} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { 469 my $tcp_connect = $arg{tcp_connect}
470 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
471
472 $state{connect_guard} = $tcp_connect->(
473 $rhost,
474 $rport,
475 sub {
460 $state{fh} = shift 476 $state{fh} = shift
461 or do { 477 or do {
462 my $err = "$!"; 478 my $err = "$!";
463 %state = (); 479 %state = ();
464 return $cb->(undef, { Status => 599, Reason => $err, @pseudo }); 480 return $cb->(undef, { Status => 599, Reason => $err, @pseudo });
465 }; 481 };
466 482
467 pop; # free memory, save a tree 483 pop; # free memory, save a tree
468 484
469 return unless delete $state{connect_guard}; 485 return unless delete $state{connect_guard};
470 486
471 # get handle 487 # get handle
472 $state{handle} = new AnyEvent::Handle 488 $state{handle} = new AnyEvent::Handle
473 fh => $state{fh}, 489 fh => $state{fh},
474 peername => $rhost, 490 peername => $rhost,
475 tls_ctx => $arg{tls_ctx}, 491 tls_ctx => $arg{tls_ctx},
476 # these need to be reconfigured on keepalive handles 492 # these need to be reconfigured on keepalive handles
477 timeout => $timeout, 493 timeout => $timeout,
478 on_error => sub { 494 on_error => sub {
479 %state = (); 495 %state = ();
480 $cb->(undef, { Status => 599, Reason => $_[2], @pseudo }); 496 $cb->(undef, { Status => 599, Reason => $_[2], @pseudo });
481 }, 497 },
482 on_eof => sub { 498 on_eof => sub {
483 %state = (); 499 %state = ();
484 $cb->(undef, { Status => 599, Reason => "Unexpected end-of-file", @pseudo }); 500 $cb->(undef, { Status => 599, Reason => "Unexpected end-of-file", @pseudo });
485 }, 501 },
486 ; 502 ;
487 503
488 # limit the number of persistent connections 504 # limit the number of persistent connections
489 # keepalive not yet supported 505 # keepalive not yet supported
490# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 506# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
491# ++$KA_COUNT{$_[1]}; 507# ++$KA_COUNT{$_[1]};
492# $state{handle}{ka_count_guard} = AnyEvent::Util::guard { 508# $state{handle}{ka_count_guard} = AnyEvent::Util::guard {
493# --$KA_COUNT{$_[1]} 509# --$KA_COUNT{$_[1]}
494# }; 510# };
495# $hdr{connection} = "keep-alive"; 511# $hdr{connection} = "keep-alive";
496# } else { 512# } else {
497 delete $hdr{connection}; 513 delete $hdr{connection};
498# } 514# }
499 515
500 $state{handle}->starttls ("connect") if $rscheme eq "https"; 516 $state{handle}->starttls ("connect") if $rscheme eq "https";
501 517
502 # handle actual, non-tunneled, request 518 # handle actual, non-tunneled, request
503 my $handle_actual_request = sub { 519 my $handle_actual_request = sub {
504 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; 520 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
505 521
506 # send request 522 # send request
507 $state{handle}->push_write ( 523 $state{handle}->push_write (
508 "$method $rpath HTTP/1.0\015\012" 524 "$method $rpath HTTP/1.0\015\012"
509 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr) 525 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
510 . "\015\012" 526 . "\015\012"
511 . (delete $arg{body}) 527 . (delete $arg{body})
512 ); 528 );
513 529
514 # return if error occured during push_write() 530 # return if error occured during push_write()
515 return unless %state; 531 return unless %state;
516 532
517 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use 533 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
518 534
519 # status line and headers 535 # status line and headers
520 $state{handle}->push_read (line => $qr_nlnl, sub { 536 $state{handle}->push_read (line => $qr_nlnl, sub {
521 for ("$_[1]") { 537 for ("$_[1]") {
522 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 538 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
523 539
524 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )? \015?\012/igxc 540 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )? \015?\012/igxc
525 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid server response", @pseudo })); 541 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid server response", @pseudo }));
526 542
527 push @pseudo, 543 push @pseudo,
528 HTTPVersion => $1, 544 HTTPVersion => $1,
529 Status => $2, 545 Status => $2,
530 Reason => $3, 546 Reason => $3,
531 ; 547 ;
532 548
533 # things seen, not parsed: 549 # things seen, not parsed:
534 # p3pP="NON CUR OTPi OUR NOR UNI" 550 # p3pP="NON CUR OTPi OUR NOR UNI"
535 551
536 $hdr{lc $1} .= ",$2" 552 $hdr{lc $1} .= ",$2"
537 while /\G 553 while /\G
538 ([^:\000-\037]*): 554 ([^:\000-\037]*):
539 [\011\040]* 555 [\011\040]*
540 ((?: [^\012]+ | \012[\011\040] )*) 556 ((?: [^\012]+ | \012[\011\040] )*)
541 \012 557 \012
542 /gxc; 558 /gxc;
543 559
544 /\G$/ 560 /\G$/
545 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Garbled response headers", @pseudo })); 561 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Garbled response headers", @pseudo }));
546 }
547
548 # remove the "," prefix we added to all headers above
549 substr $_, 0, 1, ""
550 for values %hdr;
551
552 # patch in all pseudo headers
553 %hdr = (%hdr, @pseudo);
554
555 # redirect handling
556 # microsoft and other shitheads don't give a shit for following standards,
557 # try to support some common forms of broken Location headers.
558 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
559 $hdr{location} =~ s/^\.\/+//;
560
561 my $url = "$rscheme://$uhost:$uport";
562
563 unless ($hdr{location} =~ s/^\///) {
564 $url .= $upath;
565 $url =~ s/\/[^\/]*$//;
566 } 562 }
567 563
564 # remove the "," prefix we added to all headers above
565 substr $_, 0, 1, ""
566 for values %hdr;
567
568 # patch in all pseudo headers
569 %hdr = (%hdr, @pseudo);
570
571 # redirect handling
572 # microsoft and other shitheads don't give a shit for following standards,
573 # try to support some common forms of broken Location headers.
574 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
575 $hdr{location} =~ s/^\.\/+//;
576
577 my $url = "$rscheme://$uhost:$uport";
578
579 unless ($hdr{location} =~ s/^\///) {
580 $url .= $upath;
581 $url =~ s/\/[^\/]*$//;
582 }
583
568 $hdr{location} = "$url/$hdr{location}"; 584 $hdr{location} = "$url/$hdr{location}";
569 }
570
571 my $redirect;
572
573 if ($recurse) {
574 my $status = $hdr{Status};
575
576 # industry standard is to redirect POST as GET for
577 # 301, 302 and 303, in contrast to http/1.0 and 1.1.
578 # also, the UA should ask the user for 301 and 307 and POST,
579 # industry standard seems to be to simply follow.
580 # we go with the industry standard.
581 if ($status == 301 or $status == 302 or $status == 303) {
582 # HTTP/1.1 is unclear on how to mutate the method
583 $method = "GET" unless $method eq "HEAD";
584 $redirect = 1;
585 } elsif ($status == 307) {
586 $redirect = 1;
587 } 585 }
588 }
589 586
590 my $finish = sub { 587 my $redirect;
591 $state{handle}->destroy if $state{handle};
592 %state = ();
593 588
594 # set-cookie processing 589 if ($recurse) {
595 if ($arg{cookie_jar}) { 590 my $status = $hdr{Status};
596 for ($_[1]{"set-cookie"}) {
597 # parse NAME=VALUE
598 my @kv;
599 591
600 while (/\G\s* ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )/gcxs) { 592 # industry standard is to redirect POST as GET for
593 # 301, 302 and 303, in contrast to http/1.0 and 1.1.
594 # also, the UA should ask the user for 301 and 307 and POST,
595 # industry standard seems to be to simply follow.
596 # we go with the industry standard.
597 if ($status == 301 or $status == 302 or $status == 303) {
598 # HTTP/1.1 is unclear on how to mutate the method
599 $method = "GET" unless $method eq "HEAD";
601 my $name = $1; 600 $redirect = 1;
602 my $value = $3; 601 } elsif ($status == 307) {
603
604 unless ($value) {
605 $value = $2;
606 $value =~ s/\\(.)/$1/gs;
607 }
608
609 push @kv, $name => $value;
610
611 last unless /\G\s*;/gc;
612 }
613
614 last unless @kv;
615
616 my $name = shift @kv;
617 my %kv = (value => shift @kv, @kv);
618
619 my $cdom;
620 my $cpath = (delete $kv{path}) || "/";
621
622 if (exists $kv{domain}) {
623 $cdom = delete $kv{domain};
624
625 $cdom =~ s/^\.?/./; # make sure it starts with a "."
626
627 next if $cdom =~ /\.$/;
628
629 # this is not rfc-like and not netscape-like. go figure.
630 my $ndots = $cdom =~ y/.//;
631 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
632 } else {
633 $cdom = $uhost;
634 }
635
636 # store it 602 $redirect = 1;
637 $arg{cookie_jar}{version} = 1;
638 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
639
640 redo if /\G\s*,/gc;
641 } 603 }
642 } 604 }
643 605
606 my $finish = sub {
607 $state{handle}->destroy if $state{handle};
608 %state = ();
609
610 # set-cookie processing
611 if ($arg{cookie_jar}) {
612 for ($_[1]{"set-cookie"}) {
613 # parse NAME=VALUE
614 my @kv;
615
616 while (/\G\s* ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )/gcxs) {
617 my $name = $1;
618 my $value = $3;
619
620 unless ($value) {
621 $value = $2;
622 $value =~ s/\\(.)/$1/gs;
623 }
624
625 push @kv, $name => $value;
626
627 last unless /\G\s*;/gc;
628 }
629
630 last unless @kv;
631
632 my $name = shift @kv;
633 my %kv = (value => shift @kv, @kv);
634
635 my $cdom;
636 my $cpath = (delete $kv{path}) || "/";
637
638 if (exists $kv{domain}) {
639 $cdom = delete $kv{domain};
640
641 $cdom =~ s/^\.?/./; # make sure it starts with a "."
642
643 next if $cdom =~ /\.$/;
644
645 # this is not rfc-like and not netscape-like. go figure.
646 my $ndots = $cdom =~ y/.//;
647 next if $ndots < ($cdom =~ /\.[^.][^.]\.[^.][^.]$/ ? 3 : 2);
648 } else {
649 $cdom = $uhost;
650 }
651
652 # store it
653 $arg{cookie_jar}{version} = 1;
654 $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv;
655
656 redo if /\G\s*,/gc;
657 }
658 }
659
644 if ($redirect && exists $hdr{location}) { 660 if ($redirect && exists $hdr{location}) {
645 # we ignore any errors, as it is very common to receive 661 # we ignore any errors, as it is very common to receive
646 # Content-Length != 0 but no actual body 662 # Content-Length != 0 but no actual body
647 # we also access %hdr, as $_[1] might be an erro 663 # we also access %hdr, as $_[1] might be an erro
648 http_request ( 664 http_request (
649 $method => $hdr{location}, 665 $method => $hdr{location},
650 %arg, 666 %arg,
651 recurse => $recurse - 1, 667 recurse => $recurse - 1,
652 Redirect => \@_, 668 Redirect => \@_,
653 $cb); 669 $cb);
670 } else {
671 $cb->($_[0], $_[1]);
672 }
673 };
674
675 my $len = $hdr{"content-length"};
676
677 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
678 $finish->(undef, { Status => 598, Reason => "Request cancelled by on_header", @pseudo });
679 } elsif (
680 $hdr{Status} =~ /^(?:1..|[23]04)$/
681 or $method eq "HEAD"
682 or (defined $len && !$len)
683 ) {
684 # no body
685 $finish->("", \%hdr);
654 } else { 686 } else {
655 $cb->($_[0], $_[1]);
656 }
657 };
658
659 my $len = $hdr{"content-length"};
660
661 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
662 $finish->(undef, { Status => 598, Reason => "Request cancelled by on_header", @pseudo });
663 } elsif (
664 $hdr{Status} =~ /^(?:1..|[23]04)$/
665 or $method eq "HEAD"
666 or (defined $len && !$len)
667 ) {
668 # no body
669 $finish->("", \%hdr);
670 } else {
671 # body handling, four different code paths 687 # body handling, four different code paths
672 # for want_body_handle, on_body (2x), normal (2x) 688 # for want_body_handle, on_body (2x), normal (2x)
673 # we might read too much here, but it does not matter yet (no pers. connections) 689 # we might read too much here, but it does not matter yet (no pers. connections)
674 if (!$redirect && $arg{want_body_handle}) { 690 if (!$redirect && $arg{want_body_handle}) {
675 $_[0]->on_eof (undef); 691 $_[0]->on_eof (undef);
676 $_[0]->on_error (undef); 692 $_[0]->on_error (undef);
677 $_[0]->on_read (undef); 693 $_[0]->on_read (undef);
678 694
679 $finish->(delete $state{handle}, \%hdr); 695 $finish->(delete $state{handle}, \%hdr);
680 696
681 } elsif ($arg{on_body}) { 697 } elsif ($arg{on_body}) {
682 $_[0]->on_error (sub { $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }) }); 698 $_[0]->on_error (sub { $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }) });
683 if ($len) { 699 if ($len) {
700 $_[0]->on_eof (undef);
701 $_[0]->on_read (sub {
702 $len -= length $_[0]{rbuf};
703
704 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
705 or $finish->(undef, { Status => 598, Reason => "Request cancelled by on_body", @pseudo });
706
707 $len > 0
708 or $finish->("", \%hdr);
709 });
710 } else {
711 $_[0]->on_eof (sub {
712 $finish->("", \%hdr);
713 });
714 $_[0]->on_read (sub {
715 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
716 or $finish->(undef, { Status => 598, Reason => "Request cancelled by on_body", @pseudo });
717 });
718 }
719 } else {
684 $_[0]->on_eof (undef); 720 $_[0]->on_eof (undef);
685 $_[0]->on_read (sub {
686 $len -= length $_[0]{rbuf};
687 721
688 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
689 or $finish->(undef, { Status => 598, Reason => "Request cancelled by on_body", @pseudo });
690
691 $len > 0
692 or $finish->("", \%hdr);
693 });
694 } else {
695 $_[0]->on_eof (sub {
696 $finish->("", \%hdr);
697 });
698 $_[0]->on_read (sub {
699 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
700 or $finish->(undef, { Status => 598, Reason => "Request cancelled by on_body", @pseudo });
701 });
702 }
703 } else {
704 $_[0]->on_eof (undef);
705
706 if ($len) { 722 if ($len) {
707 $_[0]->on_error (sub { $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }) }); 723 $_[0]->on_error (sub { $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }) });
708 $_[0]->on_read (sub { 724 $_[0]->on_read (sub {
709 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), \%hdr) 725 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), \%hdr)
710 if $len <= length $_[0]{rbuf}; 726 if $len <= length $_[0]{rbuf};
711 }); 727 });
712 } else { 728 } else {
713 $_[0]->on_error (sub { 729 $_[0]->on_error (sub {
714 ($! == Errno::EPIPE || !$!) 730 ($! == Errno::EPIPE || !$!)
715 ? $finish->(delete $_[0]{rbuf}, \%hdr) 731 ? $finish->(delete $_[0]{rbuf}, \%hdr)
716 : $finish->(undef, { Status => 599, Reason => $_[2], @pseudo }); 732 : $finish->(undef, { Status => 599, Reason => $_[2], @pseudo });
717 }); 733 });
718 $_[0]->on_read (sub { }); 734 $_[0]->on_read (sub { });
735 }
719 } 736 }
720 } 737 }
721 } 738 });
722 }); 739 };
723 };
724 740
725 # now handle proxy-CONNECT method 741 # now handle proxy-CONNECT method
726 if ($proxy && $uscheme eq "https") { 742 if ($proxy && $uscheme eq "https") {
727 # oh dear, we have to wrap it into a connect request 743 # oh dear, we have to wrap it into a connect request
728 744
729 # maybe re-use $uauthority with patched port? 745 # maybe re-use $uauthority with patched port?
730 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012"); 746 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012");
731 $state{handle}->push_read (line => $qr_nlnl, sub { 747 $state{handle}->push_read (line => $qr_nlnl, sub {
732 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 748 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
733 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid proxy connect response ($_[1])", @pseudo })); 749 or return (%state = (), $cb->(undef, { Status => 599, Reason => "Invalid proxy connect response ($_[1])", @pseudo }));
734 750
735 if ($2 == 200) { 751 if ($2 == 200) {
736 $rpath = $upath; 752 $rpath = $upath;
737 &$handle_actual_request; 753 &$handle_actual_request;
738 } else { 754 } else {
739 %state = (); 755 %state = ();
740 $cb->(undef, { Status => $2, Reason => $3, @pseudo }); 756 $cb->(undef, { Status => $2, Reason => $3, @pseudo });
757 }
741 } 758 });
742 });
743 } else { 759 } else {
744 &$handle_actual_request; 760 &$handle_actual_request;
761 }
762
745 } 763 },
746
747 }, $arg{on_prepare} || sub { $timeout }; 764 $arg{on_prepare} || sub { $timeout }
765 );
748 }; 766 };
749 767
750 defined wantarray && AnyEvent::Util::guard { %state = () } 768 defined wantarray && AnyEvent::Util::guard { %state = () }
751} 769}
752 770

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines