… | |
… | |
122 | |
122 | |
123 | If the server sends a header multiple times, then their contents will be |
123 | If the server sends a header multiple times, then their contents will be |
124 | joined together with a comma (C<,>), as per the HTTP spec. |
124 | joined together with a comma (C<,>), as per the HTTP spec. |
125 | |
125 | |
126 | If an internal error occurs, such as not being able to resolve a hostname, |
126 | If an internal error occurs, such as not being able to resolve a hostname, |
127 | then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<59x> |
127 | then C<$data> will be C<undef>, C<< $headers->{Status} >> will be |
128 | (usually C<599>) and the C<Reason> pseudo-header will contain an error |
128 | C<590>-C<599> and the C<Reason> pseudo-header will contain an error |
129 | message. |
129 | message. Currently the following status codes are used: |
|
|
130 | |
|
|
131 | =over 4 |
|
|
132 | |
|
|
133 | =item 595 - errors during connection etsbalishment, proxy handshake. |
|
|
134 | |
|
|
135 | =item 596 - errors during TLS negotiation, request sending and header processing. |
|
|
136 | |
|
|
137 | =item 597 - errors during body receiving or processing. |
|
|
138 | |
|
|
139 | =item 598 - user aborted request via C<on_header> or C<on_body>. |
|
|
140 | |
|
|
141 | =item 599 - other, usually nonretryable, errors (garbled URL etc.). |
|
|
142 | |
|
|
143 | =back |
130 | |
144 | |
131 | A typical callback might look like this: |
145 | A typical callback might look like this: |
132 | |
146 | |
133 | sub { |
147 | sub { |
134 | my ($body, $hdr) = @_; |
148 | my ($body, $hdr) = @_; |
… | |
… | |
419 | |
433 | |
420 | for ($set_cookie) { |
434 | for ($set_cookie) { |
421 | # parse NAME=VALUE |
435 | # parse NAME=VALUE |
422 | my @kv; |
436 | my @kv; |
423 | |
437 | |
|
|
438 | # expires is not http-compliant in the original cookie-spec, |
|
|
439 | # we support the official date format and some extensions |
424 | while ( |
440 | while ( |
425 | m{ |
441 | m{ |
426 | \G\s* |
442 | \G\s* |
427 | (?: |
443 | (?: |
428 | expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+) |
444 | expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+) |
429 | | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) |
445 | | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) |
430 | ) |
446 | ) |
431 | }gcxsi |
447 | }gcxsi |
432 | ) { |
448 | ) { |
433 | my $name = $2; |
449 | my $name = $2; |
… | |
… | |
600 | _get_slot $uhost, sub { |
616 | _get_slot $uhost, sub { |
601 | $state{slot_guard} = shift; |
617 | $state{slot_guard} = shift; |
602 | |
618 | |
603 | return unless $state{connect_guard}; |
619 | return unless $state{connect_guard}; |
604 | |
620 | |
|
|
621 | my $ae_error = 595; # connecting |
|
|
622 | |
605 | my $connect_cb = sub { |
623 | my $connect_cb = sub { |
606 | $state{fh} = shift |
624 | $state{fh} = shift |
607 | or do { |
625 | or do { |
608 | my $err = "$!"; |
626 | my $err = "$!"; |
609 | %state = (); |
627 | %state = (); |
610 | return $cb->(undef, { @pseudo, Status => 599, Reason => $err }); |
628 | return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err }); |
611 | }; |
629 | }; |
612 | |
|
|
613 | pop; # free memory, save a tree |
|
|
614 | |
630 | |
615 | return unless delete $state{connect_guard}; |
631 | return unless delete $state{connect_guard}; |
616 | |
632 | |
617 | # get handle |
633 | # get handle |
618 | $state{handle} = new AnyEvent::Handle |
634 | $state{handle} = new AnyEvent::Handle |
… | |
… | |
621 | tls_ctx => $arg{tls_ctx}, |
637 | tls_ctx => $arg{tls_ctx}, |
622 | # these need to be reconfigured on keepalive handles |
638 | # these need to be reconfigured on keepalive handles |
623 | timeout => $timeout, |
639 | timeout => $timeout, |
624 | on_error => sub { |
640 | on_error => sub { |
625 | %state = (); |
641 | %state = (); |
626 | $cb->(undef, { @pseudo, Status => 599, Reason => $_[2] }); |
642 | $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] }); |
627 | }, |
643 | }, |
628 | on_eof => sub { |
644 | on_eof => sub { |
629 | %state = (); |
645 | %state = (); |
630 | $cb->(undef, { @pseudo, Status => 599, Reason => "Unexpected end-of-file" }); |
646 | $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" }); |
631 | }, |
647 | }, |
632 | ; |
648 | ; |
633 | |
649 | |
634 | # limit the number of persistent connections |
650 | # limit the number of persistent connections |
635 | # keepalive not yet supported |
651 | # keepalive not yet supported |
… | |
… | |
643 | |
659 | |
644 | $state{handle}->starttls ("connect") if $rscheme eq "https"; |
660 | $state{handle}->starttls ("connect") if $rscheme eq "https"; |
645 | |
661 | |
646 | # handle actual, non-tunneled, request |
662 | # handle actual, non-tunneled, request |
647 | my $handle_actual_request = sub { |
663 | my $handle_actual_request = sub { |
|
|
664 | $ae_error = 596; # request phase |
|
|
665 | |
648 | $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; |
666 | $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; |
649 | |
667 | |
650 | # send request |
668 | # send request |
651 | $state{handle}->push_write ( |
669 | $state{handle}->push_write ( |
652 | "$method $rpath HTTP/1.1\015\012" |
670 | "$method $rpath HTTP/1.1\015\012" |
… | |
… | |
663 | # status line and headers |
681 | # status line and headers |
664 | $state{read_response} = sub { |
682 | $state{read_response} = sub { |
665 | for ("$_[1]") { |
683 | for ("$_[1]") { |
666 | y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
684 | y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
667 | |
685 | |
668 | /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/igxc |
686 | /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci |
669 | or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" })); |
687 | or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" })); |
670 | |
688 | |
671 | # 100 Continue handling |
689 | # 100 Continue handling |
672 | # should not happen as we don't send expect: 100-continue, |
690 | # should not happen as we don't send expect: 100-continue, |
673 | # but we handle it just in case. |
691 | # but we handle it just in case. |
… | |
… | |
708 | |
726 | |
709 | if ($recurse) { |
727 | if ($recurse) { |
710 | my $status = $hdr{Status}; |
728 | my $status = $hdr{Status}; |
711 | |
729 | |
712 | # industry standard is to redirect POST as GET for |
730 | # industry standard is to redirect POST as GET for |
713 | # 301, 302 and 303, in contrast to http/1.0 and 1.1. |
731 | # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1. |
714 | # also, the UA should ask the user for 301 and 307 and POST, |
732 | # also, the UA should ask the user for 301 and 307 and POST, |
715 | # industry standard seems to be to simply follow. |
733 | # industry standard seems to be to simply follow. |
716 | # we go with the industry standard. |
734 | # we go with the industry standard. |
717 | if ($status == 301 or $status == 302 or $status == 303) { |
735 | if ($status == 301 or $status == 302 or $status == 303) { |
718 | # HTTP/1.1 is unclear on how to mutate the method |
736 | # HTTP/1.1 is unclear on how to mutate the method |
… | |
… | |
751 | $cb); |
769 | $cb); |
752 | } else { |
770 | } else { |
753 | $cb->($_[0], \%hdr); |
771 | $cb->($_[0], \%hdr); |
754 | } |
772 | } |
755 | }; |
773 | }; |
|
|
774 | |
|
|
775 | $ae_error = 597; # body phase |
756 | |
776 | |
757 | my $len = $hdr{"content-length"}; |
777 | my $len = $hdr{"content-length"}; |
758 | |
778 | |
759 | if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { |
779 | if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { |
760 | $finish->(undef, 598 => "Request cancelled by on_header"); |
780 | $finish->(undef, 598 => "Request cancelled by on_header"); |
… | |
… | |
782 | } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { |
802 | } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { |
783 | my $cl = 0; |
803 | my $cl = 0; |
784 | my $body = undef; |
804 | my $body = undef; |
785 | my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; |
805 | my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; |
786 | |
806 | |
787 | $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) }); |
|
|
788 | |
|
|
789 | my $read_chunk; $read_chunk = sub { |
807 | my $read_chunk; $read_chunk = sub { |
790 | $_[1] =~ /^([0-9a-fA-F]+)/ |
808 | $_[1] =~ /^([0-9a-fA-F]+)/ |
791 | or $finish->(undef, 599 => "Garbled chunked transfer encoding"); |
809 | or $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); |
792 | |
810 | |
793 | my $len = hex $1; |
811 | my $len = hex $1; |
794 | |
812 | |
795 | if ($len) { |
813 | if ($len) { |
796 | $cl += $len; |
814 | $cl += $len; |
… | |
… | |
799 | $on_body->($_[1], \%hdr) |
817 | $on_body->($_[1], \%hdr) |
800 | or return $finish->(undef, 598 => "Request cancelled by on_body"); |
818 | or return $finish->(undef, 598 => "Request cancelled by on_body"); |
801 | |
819 | |
802 | $_[0]->push_read (line => sub { |
820 | $_[0]->push_read (line => sub { |
803 | length $_[1] |
821 | length $_[1] |
804 | and return $finish->(undef, 599 => "Garbled chunked transfer encoding"); |
822 | and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); |
805 | $_[0]->push_read (line => $read_chunk); |
823 | $_[0]->push_read (line => $read_chunk); |
806 | }); |
824 | }); |
807 | }); |
825 | }); |
808 | } else { |
826 | } else { |
809 | $hdr{"content-length"} ||= $cl; |
827 | $hdr{"content-length"} ||= $cl; |
… | |
… | |
812 | if (length $_[1]) { |
830 | if (length $_[1]) { |
813 | for ("$_[1]") { |
831 | for ("$_[1]") { |
814 | y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
832 | y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
815 | |
833 | |
816 | my $hdr = parse_hdr |
834 | my $hdr = parse_hdr |
817 | or return $finish->(undef, 599 => "Garbled response trailers"); |
835 | or return $finish->(undef, $ae_error => "Garbled response trailers"); |
818 | |
836 | |
819 | %hdr = (%hdr, %$hdr); |
837 | %hdr = (%hdr, %$hdr); |
820 | } |
838 | } |
821 | } |
839 | } |
822 | |
840 | |
… | |
… | |
826 | }; |
844 | }; |
827 | |
845 | |
828 | $_[0]->push_read (line => $read_chunk); |
846 | $_[0]->push_read (line => $read_chunk); |
829 | |
847 | |
830 | } elsif ($arg{on_body}) { |
848 | } elsif ($arg{on_body}) { |
831 | $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) }); |
|
|
832 | |
|
|
833 | if ($len) { |
849 | if ($len) { |
834 | $_[0]->on_read (sub { |
850 | $_[0]->on_read (sub { |
835 | $len -= length $_[0]{rbuf}; |
851 | $len -= length $_[0]{rbuf}; |
836 | |
852 | |
837 | $arg{on_body}(delete $_[0]{rbuf}, \%hdr) |
853 | $arg{on_body}(delete $_[0]{rbuf}, \%hdr) |
… | |
… | |
851 | } |
867 | } |
852 | } else { |
868 | } else { |
853 | $_[0]->on_eof (undef); |
869 | $_[0]->on_eof (undef); |
854 | |
870 | |
855 | if ($len) { |
871 | if ($len) { |
856 | $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) }); |
|
|
857 | $_[0]->on_read (sub { |
872 | $_[0]->on_read (sub { |
858 | $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) |
873 | $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) |
859 | if $len <= length $_[0]{rbuf}; |
874 | if $len <= length $_[0]{rbuf}; |
860 | }); |
875 | }); |
861 | } else { |
876 | } else { |
862 | $_[0]->on_error (sub { |
877 | $_[0]->on_error (sub { |
863 | ($! == Errno::EPIPE || !$!) |
878 | ($! == Errno::EPIPE || !$!) |
864 | ? $finish->(delete $_[0]{rbuf}) |
879 | ? $finish->(delete $_[0]{rbuf}) |
865 | : $finish->(undef, 599 => $_[2]); |
880 | : $finish->(undef, $ae_error => $_[2]); |
866 | }); |
881 | }); |
867 | $_[0]->on_read (sub { }); |
882 | $_[0]->on_read (sub { }); |
868 | } |
883 | } |
869 | } |
884 | } |
870 | } |
885 | } |
… | |
… | |
950 | Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP |
965 | Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP |
951 | Date (RFC 2616). |
966 | Date (RFC 2616). |
952 | |
967 | |
953 | =item $timestamp = AnyEvent::HTTP::parse_date $date |
968 | =item $timestamp = AnyEvent::HTTP::parse_date $date |
954 | |
969 | |
955 | Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) and |
970 | Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a |
956 | returns the corresponding POSIX timestamp, or C<undef> if the date cannot |
971 | bunch of minor variations of those, and returns the corresponding POSIX |
957 | be parsed. |
972 | timestamp, or C<undef> if the date cannot be parsed. |
958 | |
973 | |
959 | =item $AnyEvent::HTTP::MAX_RECURSE |
974 | =item $AnyEvent::HTTP::MAX_RECURSE |
960 | |
975 | |
961 | The default value for the C<recurse> request parameter (default: C<10>). |
976 | The default value for the C<recurse> request parameter (default: C<10>). |
962 | |
977 | |
… | |
… | |
1001 | sub parse_date($) { |
1016 | sub parse_date($) { |
1002 | my ($date) = @_; |
1017 | my ($date) = @_; |
1003 | |
1018 | |
1004 | my ($d, $m, $y, $H, $M, $S); |
1019 | my ($d, $m, $y, $H, $M, $S); |
1005 | |
1020 | |
1006 | if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9])[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { |
1021 | if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) { |
1007 | # RFC 822/1123, required by RFC 2616 (with " ") |
1022 | # RFC 822/1123, required by RFC 2616 (with " ") |
1008 | # cookie dates (with "-") |
1023 | # cookie dates (with "-") |
1009 | |
1024 | |
1010 | ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); |
1025 | ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); |
1011 | |
1026 | |
1012 | } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { |
1027 | } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) { |
1013 | # RFC 850 |
1028 | # RFC 850 |
1014 | ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); |
1029 | ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); |
1015 | |
1030 | |
1016 | } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) { |
1031 | } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) { |
1017 | # ISO C's asctime |
1032 | # ISO C's asctime |
1018 | ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); |
1033 | ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); |
1019 | } |
1034 | } |
1020 | # other formats fail in the loop below |
1035 | # other formats fail in the loop below |
1021 | |
1036 | |