… | |
… | |
36 | |
36 | |
37 | =cut |
37 | =cut |
38 | |
38 | |
39 | package AnyEvent::HTTP; |
39 | package AnyEvent::HTTP; |
40 | |
40 | |
41 | use strict; |
41 | use common::sense; |
42 | no warnings; |
|
|
43 | |
42 | |
44 | use Errno (); |
43 | use Errno (); |
45 | |
44 | |
46 | use AnyEvent 5.0 (); |
45 | use AnyEvent 5.0 (); |
47 | use AnyEvent::Util (); |
46 | use AnyEvent::Util (); |
… | |
… | |
415 | } |
414 | } |
416 | |
415 | |
417 | # extract cookies from jar |
416 | # extract cookies from jar |
418 | sub cookie_jar_extract($$$$) { |
417 | sub cookie_jar_extract($$$$) { |
419 | my ($jar, $uscheme, $uhost, $upath) = @_; |
418 | my ($jar, $uscheme, $uhost, $upath) = @_; |
420 | |
|
|
421 | $uhost = lc $uhost; |
|
|
422 | |
419 | |
423 | %$jar = () if $jar->{version} != 1; |
420 | %$jar = () if $jar->{version} != 1; |
424 | |
421 | |
425 | my @cookies; |
422 | my @cookies; |
426 | |
423 | |
… | |
… | |
609 | : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); |
606 | : return $cb->(undef, { @pseudo, Status => 599, Reason => "Only http and https URL schemes supported" }); |
610 | |
607 | |
611 | $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x |
608 | $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x |
612 | or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); |
609 | or return $cb->(undef, { @pseudo, Status => 599, Reason => "Unparsable URL" }); |
613 | |
610 | |
614 | my $uhost = $1; |
611 | my $uhost = lc $1; |
615 | $uport = $2 if defined $2; |
612 | $uport = $2 if defined $2; |
616 | |
613 | |
617 | $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost" |
614 | $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost" |
618 | unless exists $hdr{host}; |
615 | unless exists $hdr{host}; |
619 | |
616 | |
… | |
… | |
638 | $rscheme = "http" unless defined $rscheme; |
635 | $rscheme = "http" unless defined $rscheme; |
639 | |
636 | |
640 | # don't support https requests over https-proxy transport, |
637 | # don't support https requests over https-proxy transport, |
641 | # can't be done with tls as spec'ed, unless you double-encrypt. |
638 | # can't be done with tls as spec'ed, unless you double-encrypt. |
642 | $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https"; |
639 | $rscheme = "http" if $uscheme eq "https" && $rscheme eq "https"; |
|
|
640 | |
|
|
641 | $rhost = lc $rhost; |
|
|
642 | $rscheme = lc $rscheme; |
643 | } else { |
643 | } else { |
644 | ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath); |
644 | ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath); |
645 | } |
645 | } |
646 | |
646 | |
647 | # leave out fragment and query string, just a heuristic |
647 | # leave out fragment and query string, just a heuristic |
… | |
… | |
649 | $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"}; |
649 | $hdr{"user-agent"} = $USERAGENT unless exists $hdr{"user-agent"}; |
650 | |
650 | |
651 | $hdr{"content-length"} = length $arg{body} |
651 | $hdr{"content-length"} = length $arg{body} |
652 | if length $arg{body} || $method ne "GET"; |
652 | if length $arg{body} || $method ne "GET"; |
653 | |
653 | |
654 | $hdr{connection} = "close TE"; #1.1 |
654 | $hdr{connection} = "close Te"; #1.1 |
655 | $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 |
655 | $hdr{te} = "trailers" unless exists $hdr{te}; #1.1 |
656 | |
656 | |
657 | my %state = (connect_guard => 1); |
657 | my %state = (connect_guard => 1); |
658 | |
658 | |
659 | _get_slot $uhost, sub { |
659 | _get_slot $uhost, sub { |
… | |
… | |
778 | |
778 | |
779 | $ae_error = 597; # body phase |
779 | $ae_error = 597; # body phase |
780 | |
780 | |
781 | my $len = $hdr{"content-length"}; |
781 | my $len = $hdr{"content-length"}; |
782 | |
782 | |
|
|
783 | # body handling, many different code paths |
|
|
784 | # - no body expected |
|
|
785 | # - want_body_handle |
|
|
786 | # - te chunked |
|
|
787 | # - 2x length known (with or without on_body) |
|
|
788 | # - 2x length not known (with or without on_body) |
783 | if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { |
789 | if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { |
784 | $finish->(undef, 598 => "Request cancelled by on_header"); |
790 | $finish->(undef, 598 => "Request cancelled by on_header"); |
785 | } elsif ( |
791 | } elsif ( |
786 | $hdr{Status} =~ /^(?:1..|204|205|304)$/ |
792 | $hdr{Status} =~ /^(?:1..|204|205|304)$/ |
787 | or $method eq "HEAD" |
793 | or $method eq "HEAD" |
788 | or (defined $len && !$len) |
794 | or (defined $len && $len == 0) # == 0, not !, because "0 " is true |
789 | ) { |
795 | ) { |
790 | # no body |
796 | # no body |
791 | $finish->("", undef, undef, 1); |
797 | $finish->("", undef, undef, 1); |
792 | } else { |
798 | |
793 | # body handling, many different code paths |
|
|
794 | # - no body expected |
|
|
795 | # - want_body_handle |
|
|
796 | # - te chunked |
|
|
797 | # - 2x length known (with or without on_body) |
|
|
798 | # - 2x length not known (with or without on_body) |
|
|
799 | if (!$redirect && $arg{want_body_handle}) { |
799 | } elsif (!$redirect && $arg{want_body_handle}) { |
800 | $_[0]->on_eof (undef); |
800 | $_[0]->on_eof (undef); |
801 | $_[0]->on_error (undef); |
801 | $_[0]->on_error (undef); |
802 | $_[0]->on_read (undef); |
802 | $_[0]->on_read (undef); |
803 | |
803 | |
804 | $finish->(delete $state{handle}); |
804 | $finish->(delete $state{handle}); |
805 | |
805 | |
806 | } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { |
806 | } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { |
807 | my $cl = 0; |
807 | my $cl = 0; |
808 | my $body = undef; |
808 | my $body = undef; |
809 | my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; |
809 | my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; |
810 | |
810 | |
811 | $state{read_chunk} = sub { |
811 | $state{read_chunk} = sub { |
812 | $_[1] =~ /^([0-9a-fA-F]+)/ |
812 | $_[1] =~ /^([0-9a-fA-F]+)/ |
813 | or $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); |
813 | or $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); |
814 | |
814 | |
815 | my $len = hex $1; |
815 | my $len = hex $1; |
816 | |
816 | |
817 | if ($len) { |
817 | if ($len) { |
818 | $cl += $len; |
818 | $cl += $len; |
819 | |
819 | |
820 | $_[0]->push_read (chunk => $len, sub { |
820 | $_[0]->push_read (chunk => $len, sub { |
821 | $on_body->($_[1], \%hdr) |
821 | $on_body->($_[1], \%hdr) |
822 | or return $finish->(undef, 598 => "Request cancelled by on_body"); |
822 | or return $finish->(undef, 598 => "Request cancelled by on_body"); |
823 | |
823 | |
824 | $_[0]->push_read (line => sub { |
824 | $_[0]->push_read (line => sub { |
825 | length $_[1] |
825 | length $_[1] |
826 | and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); |
826 | and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); |
827 | $_[0]->push_read (line => $state{read_chunk}); |
827 | $_[0]->push_read (line => $state{read_chunk}); |
828 | }); |
|
|
829 | }); |
828 | }); |
830 | } else { |
|
|
831 | $hdr{"content-length"} ||= $cl; |
|
|
832 | |
|
|
833 | $_[0]->push_read (line => $qr_nlnl, sub { |
|
|
834 | if (length $_[1]) { |
|
|
835 | for ("$_[1]") { |
|
|
836 | y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
|
|
837 | |
|
|
838 | my $hdr = parse_hdr |
|
|
839 | or return $finish->(undef, $ae_error => "Garbled response trailers"); |
|
|
840 | |
|
|
841 | %hdr = (%hdr, %$hdr); |
|
|
842 | } |
|
|
843 | } |
|
|
844 | |
|
|
845 | $finish->($body, undef, undef, 1); |
|
|
846 | }); |
|
|
847 | } |
|
|
848 | }; |
|
|
849 | |
|
|
850 | $_[0]->push_read (line => $state{read_chunk}); |
|
|
851 | |
|
|
852 | } elsif ($arg{on_body}) { |
|
|
853 | if ($len) { |
|
|
854 | $_[0]->on_read (sub { |
|
|
855 | $len -= length $_[0]{rbuf}; |
|
|
856 | |
|
|
857 | $arg{on_body}(delete $_[0]{rbuf}, \%hdr) |
|
|
858 | or return $finish->(undef, 598 => "Request cancelled by on_body"); |
|
|
859 | |
|
|
860 | $len > 0 |
|
|
861 | or $finish->("", undef, undef, 1); |
|
|
862 | }); |
829 | }); |
863 | } else { |
830 | } else { |
864 | $_[0]->on_eof (sub { |
831 | $hdr{"content-length"} ||= $cl; |
865 | $finish->(""); |
832 | |
|
|
833 | $_[0]->push_read (line => $qr_nlnl, sub { |
|
|
834 | if (length $_[1]) { |
|
|
835 | for ("$_[1]") { |
|
|
836 | y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
|
|
837 | |
|
|
838 | my $hdr = parse_hdr |
|
|
839 | or return $finish->(undef, $ae_error => "Garbled response trailers"); |
|
|
840 | |
|
|
841 | %hdr = (%hdr, %$hdr); |
|
|
842 | } |
866 | }); |
843 | } |
867 | $_[0]->on_read (sub { |
844 | |
868 | $arg{on_body}(delete $_[0]{rbuf}, \%hdr) |
845 | $finish->($body, undef, undef, 1); |
869 | or $finish->(undef, 598 => "Request cancelled by on_body"); |
|
|
870 | }); |
846 | }); |
871 | } |
847 | } |
|
|
848 | }; |
|
|
849 | |
|
|
850 | $_[0]->push_read (line => $state{read_chunk}); |
|
|
851 | |
|
|
852 | } elsif ($arg{on_body}) { |
|
|
853 | if (defined $len) { |
|
|
854 | $_[0]->on_read (sub { |
|
|
855 | $len -= length $_[0]{rbuf}; |
|
|
856 | |
|
|
857 | $arg{on_body}(delete $_[0]{rbuf}, \%hdr) |
|
|
858 | or return $finish->(undef, 598 => "Request cancelled by on_body"); |
|
|
859 | |
|
|
860 | $len > 0 |
|
|
861 | or $finish->("", undef, undef, 1); |
|
|
862 | }); |
872 | } else { |
863 | } else { |
873 | $_[0]->on_eof (undef); |
864 | $_[0]->on_eof (sub { |
874 | |
865 | $finish->(""); |
875 | if ($len) { |
866 | }); |
876 | $_[0]->on_read (sub { |
867 | $_[0]->on_read (sub { |
|
|
868 | $arg{on_body}(delete $_[0]{rbuf}, \%hdr) |
|
|
869 | or $finish->(undef, 598 => "Request cancelled by on_body"); |
|
|
870 | }); |
|
|
871 | } |
|
|
872 | } else { |
|
|
873 | $_[0]->on_eof (undef); |
|
|
874 | |
|
|
875 | if (defined $len) { |
|
|
876 | $_[0]->on_read (sub { |
877 | $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) |
877 | $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) |
878 | if $len <= length $_[0]{rbuf}; |
878 | if $len <= length $_[0]{rbuf}; |
879 | }); |
879 | }); |
880 | } else { |
880 | } else { |
881 | $_[0]->on_error (sub { |
881 | $_[0]->on_error (sub { |
882 | ($! == Errno::EPIPE || !$!) |
882 | ($! == Errno::EPIPE || !$!) |
883 | ? $finish->(delete $_[0]{rbuf}) |
883 | ? $finish->(delete $_[0]{rbuf}) |
884 | : $finish->(undef, $ae_error => $_[2]); |
884 | : $finish->(undef, $ae_error => $_[2]); |
885 | }); |
885 | }); |
886 | $_[0]->on_read (sub { }); |
886 | $_[0]->on_read (sub { }); |
887 | } |
|
|
888 | } |
887 | } |
889 | } |
888 | } |
890 | }; |
889 | }; |
891 | |
890 | |
892 | $state{handle}->push_read (line => $qr_nlnl, $state{read_response}); |
891 | $state{handle}->push_read (line => $qr_nlnl, $state{read_response}); |
… | |
… | |
934 | # now handle proxy-CONNECT method |
933 | # now handle proxy-CONNECT method |
935 | if ($proxy && $uscheme eq "https") { |
934 | if ($proxy && $uscheme eq "https") { |
936 | # oh dear, we have to wrap it into a connect request |
935 | # oh dear, we have to wrap it into a connect request |
937 | |
936 | |
938 | # maybe re-use $uauthority with patched port? |
937 | # maybe re-use $uauthority with patched port? |
939 | $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012Host: $uhost\015\012\015\012"); |
938 | $state{handle}->push_write ("CONNECT $uhost:$uport HTTP/1.0\015\012\015\012"); |
940 | $state{handle}->push_read (line => $qr_nlnl, sub { |
939 | $state{handle}->push_read (line => $qr_nlnl, sub { |
941 | $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix |
940 | $_[1] =~ /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\015\012]*) )?/ix |
942 | or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" })); |
941 | or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid proxy connect response ($_[1])" })); |
943 | |
942 | |
944 | if ($2 == 200) { |
943 | if ($2 == 200) { |