ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.86 by root, Sun Jan 2 06:38:32 2011 UTC vs.
Revision 1.89 by root, Mon Jan 3 00:23:25 2011 UTC

57our $MAX_PERSISTENT = 8; 57our $MAX_PERSISTENT = 8;
58our $PERSISTENT_TIMEOUT = 2; 58our $PERSISTENT_TIMEOUT = 2;
59our $TIMEOUT = 300; 59our $TIMEOUT = 300;
60 60
61# changing these is evil 61# changing these is evil
62our $MAX_PERSISTENT_PER_HOST = 0; 62our $MAX_PERSISTENT_PER_HOST = 2;
63our $MAX_PER_HOST = 4; 63our $MAX_PER_HOST = 4;
64 64
65our $PROXY; 65our $PROXY;
66our $ACTIVE = 0; 66our $ACTIVE = 0;
67 67
649 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"}; 649 $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"};
650 650
651 $hdr{"content-length"} = length $arg{body} 651 $hdr{"content-length"} = length $arg{body}
652 if length $arg{body} || $method ne "GET"; 652 if length $arg{body} || $method ne "GET";
653 653
654 $hdr{connection} = "close TE"; #1.1 654 my $idempotent = $method =~ /^(?:GET|HEAD|PUT|DELETE|OPTIONS|TRACE)$/;
655
656 # default value for keepalive is true iff the request is for an idempotent method
657 my $keepalive = exists $arg{keepalive}
658 ? $arg{keepalive}*1
659 : $idempotent ? $PERSISTENT_TIMEOUT : 0;
660
661 $hdr{connection} = ($keepalive ? "" : "close ") . "Te"; #1.1
655 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 662 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
656 663
657 my %state = (connect_guard => 1); 664 my %state = (connect_guard => 1);
658 665
659 _get_slot $uhost, sub {
660 $state{slot_guard} = shift;
661
662 return unless $state{connect_guard};
663
664 my $ae_error = 595; # connecting 666 my $ae_error = 595; # connecting
665 667
666 # handle actual, non-tunneled, request 668 # handle actual, non-tunneled, request
667 my $handle_actual_request = sub { 669 my $handle_actual_request = sub {
668 $ae_error = 596; # request phase 670 $ae_error = 596; # request phase
669 671
670 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; 672 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
671 673
672 # send request 674 # send request
673 $state{handle}->push_write ( 675 $state{handle}->push_write (
674 "$method $rpath HTTP/1.1\015\012" 676 "$method $rpath HTTP/1.1\015\012"
675 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr) 677 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
676 . "\015\012" 678 . "\015\012"
677 . (delete $arg{body}) 679 . (delete $arg{body})
678 ); 680 );
679 681
680 # return if error occured during push_write() 682 # return if error occured during push_write()
681 return unless %state; 683 return unless %state;
682 684
683 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use 685 # reduce memory usage, save a kitten, also re-use it for the response headers.
686 %hdr = ();
684 687
685 # status line and headers 688 # status line and headers
686 $state{read_response} = sub { 689 $state{read_response} = sub {
687 for ("$_[1]") { 690 for ("$_[1]") {
688 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 691 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
689 692
690 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci 693 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
691 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" })); 694 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
692 695
693 # 100 Continue handling 696 # 100 Continue handling
694 # should not happen as we don't send expect: 100-continue, 697 # should not happen as we don't send expect: 100-continue,
695 # but we handle it just in case. 698 # but we handle it just in case.
696 # since we send the request body regardless, if we get an error 699 # since we send the request body regardless, if we get an error
697 # we are out of-sync, which we currently do NOT handle correctly. 700 # we are out of-sync, which we currently do NOT handle correctly.
698 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response}) 701 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
699 if $2 eq 100; 702 if $2 eq 100;
700 703
701 push @pseudo, 704 push @pseudo,
702 HTTPVersion => $1, 705 HTTPVersion => $1,
703 Status => $2, 706 Status => $2,
704 Reason => $3, 707 Reason => $3,
705 ; 708 ;
706 709
707 my $hdr = parse_hdr 710 my $hdr = parse_hdr
708 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" })); 711 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
709 712
710 %hdr = (%$hdr, @pseudo); 713 %hdr = (%$hdr, @pseudo);
714 }
715
716 # redirect handling
717 # microsoft and other shitheads don't give a shit for following standards,
718 # try to support some common forms of broken Location headers.
719 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
720 $hdr{location} =~ s/^\.\/+//;
721
722 my $url = "$rscheme://$uhost:$uport";
723
724 unless ($hdr{location} =~ s/^\///) {
725 $url .= $upath;
726 $url =~ s/\/[^\/]*$//;
711 } 727 }
712 728
713 # redirect handling
714 # microsoft and other shitheads don't give a shit for following standards,
715 # try to support some common forms of broken Location headers.
716 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
717 $hdr{location} =~ s/^\.\/+//;
718
719 my $url = "$rscheme://$uhost:$uport";
720
721 unless ($hdr{location} =~ s/^\///) {
722 $url .= $upath;
723 $url =~ s/\/[^\/]*$//;
724 }
725
726 $hdr{location} = "$url/$hdr{location}"; 729 $hdr{location} = "$url/$hdr{location}";
730 }
731
732 my $redirect;
733
734 if ($recurse) {
735 my $status = $hdr{Status};
736
737 # industry standard is to redirect POST as GET for
738 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
739 # also, the UA should ask the user for 301 and 307 and POST,
740 # industry standard seems to be to simply follow.
741 # we go with the industry standard.
742 if ($status == 301 or $status == 302 or $status == 303) {
743 # HTTP/1.1 is unclear on how to mutate the method
744 $method = "GET" unless $method eq "HEAD";
745 $redirect = 1;
746 } elsif ($status == 307) {
747 $redirect = 1;
727 } 748 }
728
729 my $redirect;
730
731 if ($recurse) {
732 my $status = $hdr{Status};
733
734 # industry standard is to redirect POST as GET for
735 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
736 # also, the UA should ask the user for 301 and 307 and POST,
737 # industry standard seems to be to simply follow.
738 # we go with the industry standard.
739 if ($status == 301 or $status == 302 or $status == 303) {
740 # HTTP/1.1 is unclear on how to mutate the method
741 $method = "GET" unless $method eq "HEAD";
742 $redirect = 1;
743 } elsif ($status == 307) {
744 $redirect = 1;
745 } 749 }
750
751 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
752 my $may_keep_alive = $_[3];
753
754 $state{handle}->destroy if $state{handle};
755 %state = ();
756
757 if (defined $_[1]) {
758 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
759 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
746 } 760 }
747 761
748 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
749 my $may_keep_alive = $_[3];
750
751 $state{handle}->destroy if $state{handle};
752 %state = ();
753
754 if (defined $_[1]) {
755 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
756 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
757 }
758
759 # set-cookie processing 762 # set-cookie processing
760 if ($arg{cookie_jar}) { 763 if ($arg{cookie_jar}) {
761 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date}; 764 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
762 } 765 }
763 766
764 if ($redirect && exists $hdr{location}) { 767 if ($redirect && exists $hdr{location}) {
765 # we ignore any errors, as it is very common to receive 768 # we ignore any errors, as it is very common to receive
766 # Content-Length != 0 but no actual body 769 # Content-Length != 0 but no actual body
767 # we also access %hdr, as $_[1] might be an erro 770 # we also access %hdr, as $_[1] might be an erro
768 http_request ( 771 http_request (
769 $method => $hdr{location}, 772 $method => $hdr{location},
770 %arg, 773 %arg,
771 recurse => $recurse - 1, 774 recurse => $recurse - 1,
772 Redirect => [$_[0], \%hdr], 775 Redirect => [$_[0], \%hdr],
773 $cb); 776 $cb);
777 } else {
778 $cb->($_[0], \%hdr);
779 }
780 };
781
782 $ae_error = 597; # body phase
783
784 my $len = $hdr{"content-length"};
785
786 # body handling, many different code paths
787 # - no body expected
788 # - want_body_handle
789 # - te chunked
790 # - 2x length known (with or without on_body)
791 # - 2x length not known (with or without on_body)
792 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
793 $finish->(undef, 598 => "Request cancelled by on_header");
794 } elsif (
795 $hdr{Status} =~ /^(?:1..|204|205|304)$/
796 or $method eq "HEAD"
797 or (defined $len && $len == 0) # == 0, not !, because "0 " is true
798 ) {
799 # no body
800 $finish->("", undef, undef, 1);
801
802 } elsif (!$redirect && $arg{want_body_handle}) {
803 $_[0]->on_eof (undef);
804 $_[0]->on_error (undef);
805 $_[0]->on_read (undef);
806
807 $finish->(delete $state{handle});
808
809 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
810 my $cl = 0;
811 my $body = undef;
812 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
813
814 $state{read_chunk} = sub {
815 $_[1] =~ /^([0-9a-fA-F]+)/
816 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
817
818 my $len = hex $1;
819
820 if ($len) {
821 $cl += $len;
822
823 $_[0]->push_read (chunk => $len, sub {
824 $on_body->($_[1], \%hdr)
825 or return $finish->(undef, 598 => "Request cancelled by on_body");
826
827 $_[0]->push_read (line => sub {
828 length $_[1]
829 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
830 $_[0]->push_read (line => $state{read_chunk});
831 });
832 });
774 } else { 833 } else {
775 $cb->($_[0], \%hdr); 834 $hdr{"content-length"} ||= $cl;
835
836 $_[0]->push_read (line => $qr_nlnl, sub {
837 if (length $_[1]) {
838 for ("$_[1]") {
839 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
840
841 my $hdr = parse_hdr
842 or return $finish->(undef, $ae_error => "Garbled response trailers");
843
844 %hdr = (%hdr, %$hdr);
845 }
846 }
847
848 $finish->($body, undef, undef, 1);
849 });
776 } 850 }
777 }; 851 };
778 852
779 $ae_error = 597; # body phase
780
781 my $len = $hdr{"content-length"};
782
783 # body handling, many different code paths
784 # - no body expected
785 # - want_body_handle
786 # - te chunked
787 # - 2x length known (with or without on_body)
788 # - 2x length not known (with or without on_body)
789 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
790 $finish->(undef, 598 => "Request cancelled by on_header");
791 } elsif (
792 $hdr{Status} =~ /^(?:1..|204|205|304)$/
793 or $method eq "HEAD"
794 or (defined $len && $len == 0) # == 0, not !, because "0 " is true
795 ) {
796 # no body
797 $finish->("", undef, undef, 1);
798
799 } elsif (!$redirect && $arg{want_body_handle}) {
800 $_[0]->on_eof (undef);
801 $_[0]->on_error (undef);
802 $_[0]->on_read (undef);
803
804 $finish->(delete $state{handle});
805
806 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
807 my $cl = 0;
808 my $body = undef;
809 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
810
811 $state{read_chunk} = sub {
812 $_[1] =~ /^([0-9a-fA-F]+)/
813 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
814
815 my $len = hex $1;
816
817 if ($len) {
818 $cl += $len;
819
820 $_[0]->push_read (chunk => $len, sub {
821 $on_body->($_[1], \%hdr)
822 or return $finish->(undef, 598 => "Request cancelled by on_body");
823
824 $_[0]->push_read (line => sub {
825 length $_[1]
826 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
827 $_[0]->push_read (line => $state{read_chunk});
828 });
829 });
830 } else {
831 $hdr{"content-length"} ||= $cl;
832
833 $_[0]->push_read (line => $qr_nlnl, sub {
834 if (length $_[1]) {
835 for ("$_[1]") {
836 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
837
838 my $hdr = parse_hdr
839 or return $finish->(undef, $ae_error => "Garbled response trailers");
840
841 %hdr = (%hdr, %$hdr);
842 }
843 }
844
845 $finish->($body, undef, undef, 1);
846 });
847 }
848 };
849
850 $_[0]->push_read (line => $state{read_chunk}); 853 $_[0]->push_read (line => $state{read_chunk});
851 854
852 } elsif ($arg{on_body}) { 855 } elsif ($arg{on_body}) {
853 if (defined $len) { 856 if (defined $len) {
854 $_[0]->on_read (sub { 857 $_[0]->on_read (sub {
855 $len -= length $_[0]{rbuf}; 858 $len -= length $_[0]{rbuf};
856 859
857 $arg{on_body}(delete $_[0]{rbuf}, \%hdr) 860 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
858 or return $finish->(undef, 598 => "Request cancelled by on_body"); 861 or return $finish->(undef, 598 => "Request cancelled by on_body");
859 862
860 $len > 0 863 $len > 0
861 or $finish->("", undef, undef, 1); 864 or $finish->("", undef, undef, 1);
862 });
863 } else {
864 $_[0]->on_eof (sub {
865 $finish->("");
866 });
867 $_[0]->on_read (sub {
868 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
869 or $finish->(undef, 598 => "Request cancelled by on_body");
870 });
871 } 865 });
872 } else { 866 } else {
873 $_[0]->on_eof (undef); 867 $_[0]->on_eof (sub {
874 868 $finish->("");
875 if (defined $len) { 869 });
876 $_[0]->on_read (sub { 870 $_[0]->on_read (sub {
877 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) 871 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
878 if $len <= length $_[0]{rbuf}; 872 or $finish->(undef, 598 => "Request cancelled by on_body");
879 });
880 } else {
881 $_[0]->on_error (sub {
882 ($! == Errno::EPIPE || !$!)
883 ? $finish->(delete $_[0]{rbuf})
884 : $finish->(undef, $ae_error => $_[2]);
885 });
886 $_[0]->on_read (sub { });
887 } 873 });
888 } 874 }
875 } else {
876 $_[0]->on_eof (undef);
877
878 if (defined $len) {
879 $_[0]->on_read (sub {
880 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
881 if $len <= length $_[0]{rbuf};
882 });
883 } else {
884 $_[0]->on_error (sub {
885 ($! == Errno::EPIPE || !$!)
886 ? $finish->(delete $_[0]{rbuf})
887 : $finish->(undef, $ae_error => $_[2]);
888 });
889 $_[0]->on_read (sub { });
890 }
891 }
892 };
893
894 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
895 };
896
897 my $connect_cb = sub {
898 $state{fh} = shift
899 or do {
900 my $err = "$!";
901 %state = ();
902 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
889 }; 903 };
890 904
891 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
892 };
893
894 my $connect_cb = sub {
895 $state{fh} = shift
896 or do {
897 my $err = "$!";
898 %state = ();
899 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
900 };
901
902 return unless delete $state{connect_guard}; 905 return unless delete $state{connect_guard};
903 906
904 # get handle 907 # get handle
905 $state{handle} = new AnyEvent::Handle 908 $state{handle} = new AnyEvent::Handle
906 fh => $state{fh}, 909 fh => $state{fh},
907 peername => $rhost, 910 peername => $rhost,
908 tls_ctx => $arg{tls_ctx}, 911 tls_ctx => $arg{tls_ctx},
909 # these need to be reconfigured on keepalive handles 912 # these need to be reconfigured on keepalive handles
910 timeout => $timeout, 913 timeout => $timeout,
911 on_error => sub { 914 on_error => sub {
912 %state = (); 915 %state = ();
913 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] }); 916 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] });
914 }, 917 },
915 on_eof => sub { 918 on_eof => sub {
916 %state = (); 919 %state = ();
917 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" }); 920 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" });
918 }, 921 },
919 ; 922 ;
920 923
921 # limit the number of persistent connections 924 # limit the number of persistent connections
922 # keepalive not yet supported 925 # keepalive not yet supported
923# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 926# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
924# ++$KA_COUNT{$_[1]}; 927# ++$KA_COUNT{$_[1]};
925# $state{handle}{ka_count_guard} = AnyEvent::Util::guard { 928# $state{handle}{ka_count_guard} = AnyEvent::Util::guard {
926# --$KA_COUNT{$_[1]} 929# --$KA_COUNT{$_[1]}
927# }; 930# };
928# $hdr{connection} = "keep-alive"; 931# $hdr{connection} = "keep-alive";
929# } 932# }
930 933
931 $state{handle}->starttls ("connect") if $rscheme eq "https"; 934 $state{handle}->starttls ("connect") if $rscheme eq "https";
932 935
933 # now handle proxy-CONNECT method 936 # now handle proxy-CONNECT method
934 if ($proxy && $uscheme eq "https") { 937 if ($proxy && $uscheme eq "https") {
935 # oh dear, we have to wrap it into a connect request 938 # oh dear, we have to wrap it into a connect request
936 939
937 # maybe re-use $uauthority with patched port? 940 # maybe re-use $uauthority with patched port?
938 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012"); 941 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012");
939 $state{handle}->push_read (line => $qr_nlnl, sub { 942 $state{handle}->push_read (line => $qr_nlnl, sub {
940 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 943 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
941 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" })); 944 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }));
942 945
943 if ($2 == 200) { 946 if ($2 == 200) {
944 $rpath = $upath; 947 $rpath = $upath;
945 $handle_actual_request->(); 948 $handle_actual_request->();
946 } else { 949 } else {
947 %state = (); 950 %state = ();
948 $cb->(undef, { @pseudo, Status => $2, Reason => $3 }); 951 $cb->(undef, { @pseudo, Status => $2, Reason => $3 });
949 }
950 }); 952 }
953 });
951 } else { 954 } else {
952 $handle_actual_request->(); 955 $handle_actual_request->();
953 }
954 }; 956 }
957 };
958
959 _get_slot $uhost, sub {
960 $state{slot_guard} = shift;
961
962 return unless $state{connect_guard};
955 963
956 my $tcp_connect = $arg{tcp_connect} 964 my $tcp_connect = $arg{tcp_connect}
957 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; 965 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
958 966
959 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout }); 967 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines