ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.81 by root, Sun Jan 2 01:20:17 2011 UTC vs.
Revision 1.84 by root, Sun Jan 2 05:13:27 2011 UTC

416 416
417# extract cookies from jar 417# extract cookies from jar
418sub cookie_jar_extract($$$$) { 418sub cookie_jar_extract($$$$) {
419 my ($jar, $uscheme, $uhost, $upath) = @_; 419 my ($jar, $uscheme, $uhost, $upath) = @_;
420 420
421 $uhost = lc $uhost;
422
421 %$jar = () if $jar->{version} != 1; 423 %$jar = () if $jar->{version} != 1;
422 424
423 my @cookies; 425 my @cookies;
424 426
425 while (my ($chost, $paths) = each %$jar) { 427 while (my ($chost, $paths) = each %$jar) {
475 while ( 477 while (
476 m{ 478 m{
477 \G\s* 479 \G\s*
478 (?: 480 (?:
479 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+) 481 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
480 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) 482 | ([^=;,[:space:]]+) (?: \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) )?
481 ) 483 )
482 }gcxsi 484 }gcxsi
483 ) { 485 ) {
484 my $name = $2; 486 my $name = $2;
485 my $value = $4; 487 my $value = $4;
486 488
487 unless (defined $name) { 489 if (defined $1) {
488 # expires 490 # expires
489 $name = "expires"; 491 $name = "expires";
490 $value = $1; 492 $value = $1;
491 } elsif (!defined $value) { 493 } elsif (defined $3) {
492 # quoted 494 # quoted
493 $value = $3; 495 $value = $3;
494 $value =~ s/\\(.)/$1/gs; 496 $value =~ s/\\(.)/$1/gs;
495 } 497 }
496 498
530 $cdom = $uhost; 532 $cdom = $uhost;
531 } 533 }
532 534
533 # store it 535 # store it
534 $jar->{version} = 1; 536 $jar->{version} = 1;
535 $jar->{$cdom}{$cpath}{$name} = \%kv; 537 $jar->{lc $cdom}{$cpath}{$name} = \%kv;
536 538
537 redo if /\G\s*,/gc; 539 redo if /\G\s*,/gc;
538 } 540 }
539} 541}
540 542
658 $state{slot_guard} = shift; 660 $state{slot_guard} = shift;
659 661
660 return unless $state{connect_guard}; 662 return unless $state{connect_guard};
661 663
662 my $ae_error = 595; # connecting 664 my $ae_error = 595; # connecting
665
666 # handle actual, non-tunneled, request
667 my $handle_actual_request = sub {
668 $ae_error = 596; # request phase
669
670 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
671
672 # send request
673 $state{handle}->push_write (
674 "$method $rpath HTTP/1.1\015\012"
675 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
676 . "\015\012"
677 . (delete $arg{body})
678 );
679
680 # return if error occured during push_write()
681 return unless %state;
682
683 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
684
685 # status line and headers
686 $state{read_response} = sub {
687 for ("$_[1]") {
688 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
689
690 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
691 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
692
693 # 100 Continue handling
694 # should not happen as we don't send expect: 100-continue,
695 # but we handle it just in case.
696 # since we send the request body regardless, if we get an error
697 # we are out of-sync, which we currently do NOT handle correctly.
698 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
699 if $2 eq 100;
700
701 push @pseudo,
702 HTTPVersion => $1,
703 Status => $2,
704 Reason => $3,
705 ;
706
707 my $hdr = parse_hdr
708 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
709
710 %hdr = (%$hdr, @pseudo);
711 }
712
713 # redirect handling
714 # microsoft and other shitheads don't give a shit for following standards,
715 # try to support some common forms of broken Location headers.
716 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
717 $hdr{location} =~ s/^\.\/+//;
718
719 my $url = "$rscheme://$uhost:$uport";
720
721 unless ($hdr{location} =~ s/^\///) {
722 $url .= $upath;
723 $url =~ s/\/[^\/]*$//;
724 }
725
726 $hdr{location} = "$url/$hdr{location}";
727 }
728
729 my $redirect;
730
731 if ($recurse) {
732 my $status = $hdr{Status};
733
734 # industry standard is to redirect POST as GET for
735 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
736 # also, the UA should ask the user for 301 and 307 and POST,
737 # industry standard seems to be to simply follow.
738 # we go with the industry standard.
739 if ($status == 301 or $status == 302 or $status == 303) {
740 # HTTP/1.1 is unclear on how to mutate the method
741 $method = "GET" unless $method eq "HEAD";
742 $redirect = 1;
743 } elsif ($status == 307) {
744 $redirect = 1;
745 }
746 }
747
748 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
749 my $may_keep_alive = $_[3];
750
751 $state{handle}->destroy if $state{handle};
752 %state = ();
753
754 if (defined $_[1]) {
755 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
756 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
757 }
758
759 # set-cookie processing
760 if ($arg{cookie_jar}) {
761 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
762 }
763
764 if ($redirect && exists $hdr{location}) {
765 # we ignore any errors, as it is very common to receive
766 # Content-Length != 0 but no actual body
767 # we also access %hdr, as $_[1] might be an erro
768 http_request (
769 $method => $hdr{location},
770 %arg,
771 recurse => $recurse - 1,
772 Redirect => [$_[0], \%hdr],
773 $cb);
774 } else {
775 $cb->($_[0], \%hdr);
776 }
777 };
778
779 $ae_error = 597; # body phase
780
781 my $len = $hdr{"content-length"};
782 warn "no content $redirect x<$len>$hdr{Status}\n";#d#
783
784 # body handling, many different code paths
785 # - no body expected
786 # - want_body_handle
787 # - te chunked
788 # - 2x length known (with or without on_body)
789 # - 2x length not known (with or without on_body)
790 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
791 $finish->(undef, 598 => "Request cancelled by on_header");
792 } elsif (
793 $hdr{Status} =~ /^(?:1..|204|205|304)$/
794 or $method eq "HEAD"
795 or (defined $len && $len == 0) # == 0, not !, because "0 " is true
796 ) {
797 # no body
798 $finish->("", undef, undef, 1);
799
800 } elsif (!$redirect && $arg{want_body_handle}) {
801 $_[0]->on_eof (undef);
802 $_[0]->on_error (undef);
803 $_[0]->on_read (undef);
804
805 $finish->(delete $state{handle});
806
807 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
808 my $cl = 0;
809 my $body = undef;
810 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
811
812 $state{read_chunk} = sub {
813 $_[1] =~ /^([0-9a-fA-F]+)/
814 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
815
816 my $len = hex $1;
817
818 if ($len) {
819 $cl += $len;
820
821 $_[0]->push_read (chunk => $len, sub {
822 $on_body->($_[1], \%hdr)
823 or return $finish->(undef, 598 => "Request cancelled by on_body");
824
825 $_[0]->push_read (line => sub {
826 length $_[1]
827 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
828 $_[0]->push_read (line => $state{read_chunk});
829 });
830 });
831 } else {
832 $hdr{"content-length"} ||= $cl;
833
834 $_[0]->push_read (line => $qr_nlnl, sub {
835 if (length $_[1]) {
836 for ("$_[1]") {
837 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
838
839 my $hdr = parse_hdr
840 or return $finish->(undef, $ae_error => "Garbled response trailers");
841
842 %hdr = (%hdr, %$hdr);
843 }
844 }
845
846 $finish->($body, undef, undef, 1);
847 });
848 }
849 };
850
851 $_[0]->push_read (line => $state{read_chunk});
852
853 } elsif ($arg{on_body}) {
854 if (defined $len) {
855 $_[0]->on_read (sub {
856 $len -= length $_[0]{rbuf};
857
858 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
859 or return $finish->(undef, 598 => "Request cancelled by on_body");
860
861 $len > 0
862 or $finish->("", undef, undef, 1);
863 });
864 } else {
865 $_[0]->on_eof (sub {
866 $finish->("");
867 });
868 $_[0]->on_read (sub {
869 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
870 or $finish->(undef, 598 => "Request cancelled by on_body");
871 });
872 }
873 } else {
874 $_[0]->on_eof (undef);
875
876 if (defined $len) {
877 $_[0]->on_read (sub {
878 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
879 if $len <= length $_[0]{rbuf};
880 });
881 } else {
882 $_[0]->on_error (sub {
883 ($! == Errno::EPIPE || !$!)
884 ? $finish->(delete $_[0]{rbuf})
885 : $finish->(undef, $ae_error => $_[2]);
886 });
887 $_[0]->on_read (sub { });
888 }
889 }
890 };
891
892 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
893 };
663 894
664 my $connect_cb = sub { 895 my $connect_cb = sub {
665 $state{fh} = shift 896 $state{fh} = shift
666 or do { 897 or do {
667 my $err = "$!"; 898 my $err = "$!";
698# $hdr{connection} = "keep-alive"; 929# $hdr{connection} = "keep-alive";
699# } 930# }
700 931
701 $state{handle}->starttls ("connect") if $rscheme eq "https"; 932 $state{handle}->starttls ("connect") if $rscheme eq "https";
702 933
703 # handle actual, non-tunneled, request
704 my $handle_actual_request = sub {
705 $ae_error = 596; # request phase
706
707 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
708
709 # send request
710 $state{handle}->push_write (
711 "$method $rpath HTTP/1.1\015\012"
712 . (join "", map "\u$_: $hdr{$_}\015\012", grep defined $hdr{$_}, keys %hdr)
713 . "\015\012"
714 . (delete $arg{body})
715 );
716
717 # return if error occured during push_write()
718 return unless %state;
719
720 %hdr = (); # reduce memory usage, save a kitten, also make it possible to re-use
721
722 # status line and headers
723 $state{read_response} = sub {
724 for ("$_[1]") {
725 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
726
727 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
728 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
729
730 # 100 Continue handling
731 # should not happen as we don't send expect: 100-continue,
732 # but we handle it just in case.
733 # since we send the request body regardless, if we get an error
734 # we are out of-sync, which we currently do NOT handle correctly.
735 return $state{handle}->push_read (line => $qr_nlnl, $state{read_response})
736 if $2 eq 100;
737
738 push @pseudo,
739 HTTPVersion => $1,
740 Status => $2,
741 Reason => $3,
742 ;
743
744 my $hdr = parse_hdr
745 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Garbled response headers" }));
746
747 %hdr = (%$hdr, @pseudo);
748 }
749
750 # redirect handling
751 # microsoft and other shitheads don't give a shit for following standards,
752 # try to support some common forms of broken Location headers.
753 if ($hdr{location} !~ /^(?: $ | [^:\/?\#]+ : )/x) {
754 $hdr{location} =~ s/^\.\/+//;
755
756 my $url = "$rscheme://$uhost:$uport";
757
758 unless ($hdr{location} =~ s/^\///) {
759 $url .= $upath;
760 $url =~ s/\/[^\/]*$//;
761 }
762
763 $hdr{location} = "$url/$hdr{location}";
764 }
765
766 my $redirect;
767
768 if ($recurse) {
769 my $status = $hdr{Status};
770
771 # industry standard is to redirect POST as GET for
772 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
773 # also, the UA should ask the user for 301 and 307 and POST,
774 # industry standard seems to be to simply follow.
775 # we go with the industry standard.
776 if ($status == 301 or $status == 302 or $status == 303) {
777 # HTTP/1.1 is unclear on how to mutate the method
778 $method = "GET" unless $method eq "HEAD";
779 $redirect = 1;
780 } elsif ($status == 307) {
781 $redirect = 1;
782 }
783 }
784
785 my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive])
786 my $may_keep_alive = $_[3];
787
788 $state{handle}->destroy if $state{handle};
789 %state = ();
790
791 if (defined $_[1]) {
792 $hdr{OrigStatus} = $hdr{Status}; $hdr{Status} = $_[1];
793 $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2];
794 }
795
796 # set-cookie processing
797 if ($arg{cookie_jar}) {
798 cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost, $hdr{date};
799 }
800
801 if ($redirect && exists $hdr{location}) {
802 # we ignore any errors, as it is very common to receive
803 # Content-Length != 0 but no actual body
804 # we also access %hdr, as $_[1] might be an erro
805 http_request (
806 $method => $hdr{location},
807 %arg,
808 recurse => $recurse - 1,
809 Redirect => [$_[0], \%hdr],
810 $cb);
811 } else {
812 $cb->($_[0], \%hdr);
813 }
814 };
815
816 $ae_error = 597; # body phase
817
818 my $len = $hdr{"content-length"};
819
820 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
821 $finish->(undef, 598 => "Request cancelled by on_header");
822 } elsif (
823 $hdr{Status} =~ /^(?:1..|204|205|304)$/
824 or $method eq "HEAD"
825 or (defined $len && !$len)
826 ) {
827 # no body
828 $finish->("", undef, undef, 1);
829 } else {
830 # body handling, many different code paths
831 # - no body expected
832 # - want_body_handle
833 # - te chunked
834 # - 2x length known (with or without on_body)
835 # - 2x length not known (with or without on_body)
836 if (!$redirect && $arg{want_body_handle}) {
837 $_[0]->on_eof (undef);
838 $_[0]->on_error (undef);
839 $_[0]->on_read (undef);
840
841 $finish->(delete $state{handle});
842
843 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
844 my $cl = 0;
845 my $body = undef;
846 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
847
848 $state{read_chunk} = sub {
849 $_[1] =~ /^([0-9a-fA-F]+)/
850 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
851
852 my $len = hex $1;
853
854 if ($len) {
855 $cl += $len;
856
857 $_[0]->push_read (chunk => $len, sub {
858 $on_body->($_[1], \%hdr)
859 or return $finish->(undef, 598 => "Request cancelled by on_body");
860
861 $_[0]->push_read (line => sub {
862 length $_[1]
863 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
864 $_[0]->push_read (line => $state{read_chunk});
865 });
866 });
867 } else {
868 $hdr{"content-length"} ||= $cl;
869
870 $_[0]->push_read (line => $qr_nlnl, sub {
871 if (length $_[1]) {
872 for ("$_[1]") {
873 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
874
875 my $hdr = parse_hdr
876 or return $finish->(undef, $ae_error => "Garbled response trailers");
877
878 %hdr = (%hdr, %$hdr);
879 }
880 }
881
882 $finish->($body, undef, undef, 1);
883 });
884 }
885 };
886
887 $_[0]->push_read (line => $state{read_chunk});
888
889 } elsif ($arg{on_body}) {
890 if ($len) {
891 $_[0]->on_read (sub {
892 $len -= length $_[0]{rbuf};
893
894 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
895 or return $finish->(undef, 598 => "Request cancelled by on_body");
896
897 $len > 0
898 or $finish->("", undef, undef, 1);
899 });
900 } else {
901 $_[0]->on_eof (sub {
902 $finish->("");
903 });
904 $_[0]->on_read (sub {
905 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
906 or $finish->(undef, 598 => "Request cancelled by on_body");
907 });
908 }
909 } else {
910 $_[0]->on_eof (undef);
911
912 if ($len) {
913 $_[0]->on_read (sub {
914 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
915 if $len <= length $_[0]{rbuf};
916 });
917 } else {
918 $_[0]->on_error (sub {
919 ($! == Errno::EPIPE || !$!)
920 ? $finish->(delete $_[0]{rbuf})
921 : $finish->(undef, $ae_error => $_[2]);
922 });
923 $_[0]->on_read (sub { });
924 }
925 }
926 }
927 };
928
929 $state{handle}->push_read (line => $qr_nlnl, $state{read_response});
930 };
931
932 # now handle proxy-CONNECT method 934 # now handle proxy-CONNECT method
933 if ($proxy && $uscheme eq "https") { 935 if ($proxy && $uscheme eq "https") {
934 # oh dear, we have to wrap it into a connect request 936 # oh dear, we have to wrap it into a connect request
935 937
936 # maybe re-use $uauthority with patched port? 938 # maybe re-use $uauthority with patched port?
954 956
955 my $tcp_connect = $arg{tcp_connect} 957 my $tcp_connect = $arg{tcp_connect}
956 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect }; 958 || do { require AnyEvent::Socket; \&AnyEvent::Socket::tcp_connect };
957 959
958 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout }); 960 $state{connect_guard} = $tcp_connect->($rhost, $rport, $connect_cb, $arg{on_prepare} || sub { $timeout });
959
960 }; 961 };
961 962
962 defined wantarray && AnyEvent::Util::guard { %state = () } 963 defined wantarray && AnyEvent::Util::guard { %state = () }
963} 964}
964 965

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines