ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.87 by root, Sun Jan 2 08:51:53 2011 UTC vs.
Revision 1.88 by root, Sun Jan 2 20:57:03 2011 UTC

57our $MAX_PERSISTENT = 8; 57our $MAX_PERSISTENT = 8;
58our $PERSISTENT_TIMEOUT = 2; 58our $PERSISTENT_TIMEOUT = 2;
59our $TIMEOUT = 300; 59our $TIMEOUT = 300;
60 60
61# changing these is evil 61# changing these is evil
62our $MAX_PERSISTENT_PER_HOST = 0; 62our $MAX_PERSISTENT_PER_HOST = 2;
63our $MAX_PER_HOST = 4; 63our $MAX_PER_HOST = 4;
64 64
65our $PROXY; 65our $PROXY;
66our $ACTIVE = 0; 66our $ACTIVE = 0;
67 67
654 $hdr{connection} = "close Te"; #1.1 654 $hdr{connection} = "close Te"; #1.1
655 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 655 $hdr{te} = "trailers" unless exists $hdr{te}; #1.1
656 656
657 my %state = (connect_guard => 1); 657 my %state = (connect_guard => 1);
658 658
659 _get_slot $uhost, sub {
660 $state{slot_guard} = shift;
661
662 return unless $state{connect_guard};
663
664 my $ae_error = 595; # connecting 659 my $ae_error = 595; # connecting
665 660
666 # handle actual, non-tunneled, request 661 # handle actual, non-tunneled, request
667 my $handle_actual_request = sub { 662 my $handle_actual_request = sub {
668 $ae_error = 596; # request phase 663 $ae_error = 596; # request phase
669 664
670 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; 665 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
671 666
672 # send request 667 # send request
673 $state{handle}->push_write ( 668 $state{handle}->push_write (
674 "$method $rpath HTTP/1.1\015\012" 669 "$method $rpath HTTP/1.1\015\012"
675 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr) 670 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
676 . "\015\012" 671 . "\015\012"
677 . (delete $arg{body}) 672 . (delete $arg{body})
678 ); 673 );
679 674
680 # return if error occured during push_write() 675 # return if error occured during push_write()
681 return unless %state; 676 return unless %state;
682 677
683 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use 678 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
684 679
685 # status line and headers 680 # status line and headers
686 $state{read_response} = sub { 681 $state{read_response} = sub {
687 for ("$_[1]") { 682 for ("$_[1]") {
688 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 683 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
689 684
690 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci 685 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
691 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" })); 686 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
692 687
693 # 100 Continue handling 688 # 100 Continue handling
694 # should not happen as we don't send expect: 100-continue, 689 # should not happen as we don't send expect: 100-continue,
695 # but we handle it just in case. 690 # but we handle it just in case.
696 # since we send the request body regardless, if we get an error 691 # since we send the request body regardless, if we get an error
697 # we are out of-sync, which we currently do NOT handle correctly. 692 # we are out of-sync, which we currently do NOT handle correctly.
698 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response}) 693 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
699 if $2 eq 100; 694 if $2 eq 100;
700 695
701 push @pseudo, 696 push @pseudo,
702 HTTPVersion => $1, 697 HTTPVersion => $1,
703 Status => $2, 698 Status => $2,
704 Reason => $3, 699 Reason => $3,
705 ; 700 ;
706 701
707 my $hdr = parse_hdr 702 my $hdr = parse_hdr
708 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" })); 703 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
709 704
710 %hdr = (%$hdr, @pseudo); 705 %hdr = (%$hdr, @pseudo);
706 }
707
708 # redirect handling
709 # microsoft and other shitheads don't give a shit for following standards,
710 # try to support some common forms of broken Location headers.
711 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
712 $hdr{location} =~ s/^\.\/+//;
713
714 my $url = "$rscheme://$uhost:$uport";
715
716 unless ($hdr{location} =~ s/^\///) {
717 $url .= $upath;
718 $url =~ s/\/[^\/]*$//;
711 } 719 }
712 720
713 # redirect handling
714 # microsoft and other shitheads don't give a shit for following standards,
715 # try to support some common forms of broken Location headers.
716 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
717 $hdr{location} =~ s/^\.\/+//;
718
719 my $url = "$rscheme://$uhost:$uport";
720
721 unless ($hdr{location} =~ s/^\///) {
722 $url .= $upath;
723 $url =~ s/\/[^\/]*$//;
724 }
725
726 $hdr{location} = "$url/$hdr{location}"; 721 $hdr{location} = "$url/$hdr{location}";
722 }
723
724 my $redirect;
725
726 if ($recurse) {
727 my $status = $hdr{Status};
728
729 # industry standard is to redirect POST as GET for
730 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
731 # also, the UA should ask the user for 301 and 307 and POST,
732 # industry standard seems to be to simply follow.
733 # we go with the industry standard.
734 if ($status == 301 or $status == 302 or $status == 303) {
735 # HTTP/1.1 is unclear on how to mutate the method
736 $method = "GET" unless $method eq "HEAD";
737 $redirect = 1;
738 } elsif ($status == 307) {
739 $redirect = 1;
727 } 740 }
728
729 my $redirect;
730
731 if ($recurse) {
732 my $status = $hdr{Status};
733
734 # industry standard is to redirect POST as GET for
735 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
736 # also, the UA should ask the user for 301 and 307 and POST,
737 # industry standard seems to be to simply follow.
738 # we go with the industry standard.
739 if ($status == 301 or $status == 302 or $status == 303) {
740 # HTTP/1.1 is unclear on how to mutate the method
741 $method = "GET" unless $method eq "HEAD";
742 $redirect = 1;
743 } elsif ($status == 307) {
744 $redirect = 1;
745 } 741 }
742
743 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
744 my $may_keep_alive = $_[3];
745
746 $state{handle}->destroy if $state{handle};
747 %state = ();
748
749 if (defined $_[1]) {
750 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
751 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
746 } 752 }
747 753
748 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
749 my $may_keep_alive = $_[3];
750
751 $state{handle}->destroy if $state{handle};
752 %state = ();
753
754 if (defined $_[1]) {
755 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
756 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
757 }
758
759 # set-cookie processing 754 # set-cookie processing
760 if ($arg{cookie_jar}) { 755 if ($arg{cookie_jar}) {
761 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date}; 756 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
762 } 757 }
763 758
764 if ($redirect && exists $hdr{location}) { 759 if ($redirect && exists $hdr{location}) {
765 # we ignore any errors, as it is very common to receive 760 # we ignore any errors, as it is very common to receive
766 # Content-Length != 0 but no actual body 761 # Content-Length != 0 but no actual body
767 # we also access %hdr, as $_[1] might be an erro 762 # we also access %hdr, as $_[1] might be an erro
768 http_request ( 763 http_request (
769 $method => $hdr{location}, 764 $method => $hdr{location},
770 %arg, 765 %arg,
771 recurse => $recurse - 1, 766 recurse => $recurse - 1,
772 Redirect => [$_[0], \%hdr], 767 Redirect => [$_[0], \%hdr],
773 $cb); 768 $cb);
769 } else {
770 $cb->($_[0], \%hdr);
771 }
772 };
773
774 $ae_error = 597; # body phase
775
776 my $len = $hdr{"content-length"};
777
778 # body handling, many different code paths
779 # - no body expected
780 # - want_body_handle
781 # - te chunked
782 # - 2x length known (with or without on_body)
783 # - 2x length not known (with or without on_body)
784 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
785 $finish->(undef, 598 => "Request cancelled by on_header");
786 } elsif (
787 $hdr{Status} =~ /^(?:1..|204|205|304)$/
788 or $method eq "HEAD"
789 or (defined $len && $len == 0) # == 0, not !, because "0 " is true
790 ) {
791 # no body
792 $finish->("", undef, undef, 1);
793
794 } elsif (!$redirect && $arg{want_body_handle}) {
795 $_[0]->on_eof (undef);
796 $_[0]->on_error (undef);
797 $_[0]->on_read (undef);
798
799 $finish->(delete $state{handle});
800
801 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
802 my $cl = 0;
803 my $body = undef;
804 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
805
806 $state{read_chunk} = sub {
807 $_[1] =~ /^([0-9a-fA-F]+)/
808 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
809
810 my $len = hex $1;
811
812 if ($len) {
813 $cl += $len;
814
815 $_[0]->push_read (chunk => $len, sub {
816 $on_body->($_[1], \%hdr)
817 or return $finish->(undef, 598 => "Request cancelled by on_body");
818
819 $_[0]->push_read (line => sub {
820 length $_[1]
821 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
822 $_[0]->push_read (line => $state{read_chunk});
823 });
824 });
774 } else { 825 } else {
775 $cb->($_[0], \%hdr); 826 $hdr{"content-length"} ||= $cl;
827
828 $_[0]->push_read (line => $qr_nlnl, sub {
829 if (length $_[1]) {
830 for ("$_[1]") {
831 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
832
833 my $hdr = parse_hdr
834 or return $finish->(undef, $ae_error => "Garbled response trailers");
835
836 %hdr = (%hdr, %$hdr);
837 }
838 }
839
840 $finish->($body, undef, undef, 1);
841 });
776 } 842 }
777 }; 843 };
778 844
779 $ae_error = 597; # body phase
780
781 my $len = $hdr{"content-length"};
782
783 # body handling, many different code paths
784 # - no body expected
785 # - want_body_handle
786 # - te chunked
787 # - 2x length known (with or without on_body)
788 # - 2x length not known (with or without on_body)
789 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
790 $finish->(undef, 598 => "Request cancelled by on_header");
791 } elsif (
792 $hdr{Status} =~ /^(?:1..|204|205|304)$/
793 or $method eq "HEAD"
794 or (defined $len && $len == 0) # == 0, not !, because "0 " is true
795 ) {
796 # no body
797 $finish->("", undef, undef, 1);
798
799 } elsif (!$redirect && $arg{want_body_handle}) {
800 $_[0]->on_eof (undef);
801 $_[0]->on_error (undef);
802 $_[0]->on_read (undef);
803
804 $finish->(delete $state{handle});
805
806 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
807 my $cl = 0;
808 my $body = undef;
809 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
810
811 $state{read_chunk} = sub {
812 $_[1] =~ /^([0-9a-fA-F]+)/
813 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
814
815 my $len = hex $1;
816
817 if ($len) {
818 $cl += $len;
819
820 $_[0]->push_read (chunk => $len, sub {
821 $on_body->($_[1], \%hdr)
822 or return $finish->(undef, 598 => "Request cancelled by on_body");
823
824 $_[0]->push_read (line => sub {
825 length $_[1]
826 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
827 $_[0]->push_read (line => $state{read_chunk});
828 });
829 });
830 } else {
831 $hdr{"content-length"} ||= $cl;
832
833 $_[0]->push_read (line => $qr_nlnl, sub {
834 if (length $_[1]) {
835 for ("$_[1]") {
836 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
837
838 my $hdr = parse_hdr
839 or return $finish->(undef, $ae_error => "Garbled response trailers");
840
841 %hdr = (%hdr, %$hdr);
842 }
843 }
844
845 $finish->($body, undef, undef, 1);
846 });
847 }
848 };
849
850 $_[0]->push_read (line => $state{read_chunk}); 845 $_[0]->push_read (line => $state{read_chunk});
851 846
852 } elsif ($arg{on_body}) { 847 } elsif ($arg{on_body}) {
853 if (defined $len) { 848 if (defined $len) {
854 $_[0]->on_read (sub { 849 $_[0]->on_read (sub {
855 $len -= length $_[0]{rbuf}; 850 $len -= length $_[0]{rbuf};
856 851
857 $arg{on_body}(delete $_[0]{rbuf}, \%hdr) 852 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
858 or return $finish->(undef, 598 => "Request cancelled by on_body"); 853 or return $finish->(undef, 598 => "Request cancelled by on_body");
859 854
860 $len > 0 855 $len > 0
861 or $finish->("", undef, undef, 1); 856 or $finish->("", undef, undef, 1);
862 });
863 } else {
864 $_[0]->on_eof (sub {
865 $finish->("");
866 });
867 $_[0]->on_read (sub {
868 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
869 or $finish->(undef, 598 => "Request cancelled by on_body");
870 });
871 } 857 });
872 } else { 858 } else {
873 $_[0]->on_eof (undef); 859 $_[0]->on_eof (sub {
874 860 $finish->("");
875 if (defined $len) { 861 });
876 $_[0]->on_read (sub { 862 $_[0]->on_read (sub {
877 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) 863 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
878 if $len <= length $_[0]{rbuf}; 864 or $finish->(undef, 598 => "Request cancelled by on_body");
879 });
880 } else {
881 $_[0]->on_error (sub {
882 ($! == Errno::EPIPE || !$!)
883 ? $finish->(delete $_[0]{rbuf})
884 : $finish->(undef, $ae_error => $_[2]);
885 });
886 $_[0]->on_read (sub { });
887 } 865 });
888 } 866 }
867 } else {
868 $_[0]->on_eof (undef);
869
870 if (defined $len) {
871 $_[0]->on_read (sub {
872 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
873 if $len <= length $_[0]{rbuf};
874 });
875 } else {
876 $_[0]->on_error (sub {
877 ($! == Errno::EPIPE || !$!)
878 ? $finish->(delete $_[0]{rbuf})
879 : $finish->(undef, $ae_error => $_[2]);
880 });
881 $_[0]->on_read (sub { });
882 }
883 }
884 };
885
886 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
887 };
888
889 my $connect_cb = sub {
890 $state{fh} = shift
891 or do {
892 my $err = "$!";
893 %state = ();
894 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
889 }; 895 };
890 896
891 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
892 };
893
894 my $connect_cb = sub {
895 $state{fh} = shift
896 or do {
897 my $err = "$!";
898 %state = ();
899 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
900 };
901
902 return unless delete $state{connect_guard}; 897 return unless delete $state{connect_guard};
903 898
904 # get handle 899 # get handle
905 $state{handle} = new AnyEvent::Handle 900 $state{handle} = new AnyEvent::Handle
906 fh => $state{fh}, 901 fh => $state{fh},
907 peername => $rhost, 902 peername => $rhost,
908 tls_ctx => $arg{tls_ctx}, 903 tls_ctx => $arg{tls_ctx},
909 # these need to be reconfigured on keepalive handles 904 # these need to be reconfigured on keepalive handles
910 timeout => $timeout, 905 timeout => $timeout,
911 on_error => sub { 906 on_error => sub {
912 %state = (); 907 %state = ();
913 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] }); 908 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] });
914 }, 909 },
915 on_eof => sub { 910 on_eof => sub {
916 %state = (); 911 %state = ();
917 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" }); 912 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" });
918 }, 913 },
919 ; 914 ;
920 915
921 # limit the number of persistent connections 916 # limit the number of persistent connections
922 # keepalive not yet supported 917 # keepalive not yet supported
923# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) { 918# if ($KA_COUNT{$_[1]} < $MAX_PERSISTENT_PER_HOST) {
924# ++$KA_COUNT{$_[1]}; 919# ++$KA_COUNT{$_[1]};
925# $state{handle}{ka_count_guard} = AnyEvent::Util::guard { 920# $state{handle}{ka_count_guard} = AnyEvent::Util::guard {
926# --$KA_COUNT{$_[1]} 921# --$KA_COUNT{$_[1]}
927# }; 922# };
928# $hdr{connection} = "keep-alive"; 923# $hdr{connection} = "keep-alive";
929# } 924# }
930 925
931 $state{handle}->starttls ("connect") if $rscheme eq "https"; 926 $state{handle}->starttls ("connect") if $rscheme eq "https";
932 927
933 # now handle proxy-CONNECT method 928 # now handle proxy-CONNECT method
934 if ($proxy && $uscheme eq "https") { 929 if ($proxy && $uscheme eq "https") {
935 # oh dear, we have to wrap it into a connect request 930 # oh dear, we have to wrap it into a connect request
936 931
937 # maybe re-use $uauthority with patched port? 932 # maybe re-use $uauthority with patched port?
938 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012"); 933 $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012");
939 $state{handle}->push_read (line => $qr_nlnl, sub { 934 $state{handle}->push_read (line => $qr_nlnl, sub {
940 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix 935 $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix
941 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" })); 936 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" }));
942 937
943 if ($2 == 200) { 938 if ($2 == 200) {
944 $rpath = $upath; 939 $rpath = $upath;
945 $handle_actual_request->(); 940 $handle_actual_request->();
946 } else { 941 } else {
947 %state = (); 942 %state = ();
948 $cb->(undef, { @pseudo, Status => $2, Reason => $3 }); 943 $cb->(undef, { @pseudo, Status => $2, Reason => $3 });
949 }
950 }); 944 }
945 });
951 } else { 946 } else {
952 $handle_actual_request->(); 947 $handle_actual_request->();
953 }
954 }; 948 }
949 };
950
951 _get_slot $uhost, sub {
952 $state{slot_guard} = shift;
953
954 return unless $state{connect_guard};
955 955
956 my $tcp_connect = $arg{tcp_connect} 956 my $tcp_connect = $arg{tcp_connect}
957 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; 957 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
958 958
959 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout }); 959 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines