… | |
… | |
122 | |
122 | |
123 | If the server sends a header multiple times, then their contents will be |
123 | If the server sends a header multiple times, then their contents will be |
124 | joined together with a comma (C<,>), as per the HTTP spec. |
124 | joined together with a comma (C<,>), as per the HTTP spec. |
125 | |
125 | |
126 | If an internal error occurs, such as not being able to resolve a hostname, |
126 | If an internal error occurs, such as not being able to resolve a hostname, |
127 | then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<59x> |
127 | then C<$data> will be C<undef>, C<< $headers->{Status} >> will be |
128 | (usually C<599>) and the C<Reason> pseudo-header will contain an error |
128 | C<590>-C<599> and the C<Reason> pseudo-header will contain an error |
129 | message. |
129 | message. Currently the following status codes are used: |
|
|
130 | |
|
|
131 | =over 4 |
|
|
132 | |
|
|
133 | =item 595 - errors during connection etsbalishment, proxy handshake. |
|
|
134 | |
|
|
135 | =item 596 - errors during TLS negotiation, request sending and header processing. |
|
|
136 | |
|
|
137 | =item 597 - errors during body receiving or processing. |
|
|
138 | |
|
|
139 | =item 598 - user aborted request via C<on_header> or C<on_body>. |
|
|
140 | |
|
|
141 | =item 599 - other, usually nonretryable, errors (garbled URL etc.). |
|
|
142 | |
|
|
143 | =back |
130 | |
144 | |
131 | A typical callback might look like this: |
145 | A typical callback might look like this: |
132 | |
146 | |
133 | sub { |
147 | sub { |
134 | my ($body, $hdr) = @_; |
148 | my ($body, $hdr) = @_; |
… | |
… | |
419 | |
433 | |
420 | for ($set_cookie) { |
434 | for ($set_cookie) { |
421 | # parse NAME=VALUE |
435 | # parse NAME=VALUE |
422 | my @kv; |
436 | my @kv; |
423 | |
437 | |
|
|
438 | # expires is not http-compliant in the original cookie-spec, |
|
|
439 | # we support the official date format and some extensions |
424 | while ( |
440 | while ( |
425 | m{ |
441 | m{ |
426 | \G\s* |
442 | \G\s* |
427 | (?: |
443 | (?: |
428 | expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+) |
444 | expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+) |
429 | | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) |
445 | | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) |
430 | ) |
446 | ) |
431 | }gcxsi |
447 | }gcxsi |
432 | ) { |
448 | ) { |
433 | my $name = $2; |
449 | my $name = $2; |
… | |
… | |
600 | _get_slot $uhost, sub { |
616 | _get_slot $uhost, sub { |
601 | $state{slot_guard} = shift; |
617 | $state{slot_guard} = shift; |
602 | |
618 | |
603 | return unless $state{connect_guard}; |
619 | return unless $state{connect_guard}; |
604 | |
620 | |
|
|
621 | my $ae_error = 595; # connecting |
|
|
622 | |
605 | my $connect_cb = sub { |
623 | my $connect_cb = sub { |
606 | $state{fh} = shift |
624 | $state{fh} = shift |
607 | or do { |
625 | or do { |
608 | my $err = "$!"; |
626 | my $err = "$!"; |
609 | %state = (); |
627 | %state = (); |
610 | return $cb->(undef, { @pseudo, Status => 599, Reason => $err }); |
628 | return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err }); |
611 | }; |
629 | }; |
612 | |
630 | |
613 | return unless delete $state{connect_guard}; |
631 | return unless delete $state{connect_guard}; |
614 | |
632 | |
615 | # get handle |
633 | # get handle |
… | |
… | |
619 | tls_ctx => $arg{tls_ctx}, |
637 | tls_ctx => $arg{tls_ctx}, |
620 | # these need to be reconfigured on keepalive handles |
638 | # these need to be reconfigured on keepalive handles |
621 | timeout => $timeout, |
639 | timeout => $timeout, |
622 | on_error => sub { |
640 | on_error => sub { |
623 | %state = (); |
641 | %state = (); |
624 | $cb->(undef, { @pseudo, Status => 599, Reason => $_[2] }); |
642 | $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] }); |
625 | }, |
643 | }, |
626 | on_eof => sub { |
644 | on_eof => sub { |
627 | %state = (); |
645 | %state = (); |
628 | $cb->(undef, { @pseudo, Status => 599, Reason => "Unexpected end-of-file" }); |
646 | $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" }); |
629 | }, |
647 | }, |
630 | ; |
648 | ; |
631 | |
649 | |
632 | # limit the number of persistent connections |
650 | # limit the number of persistent connections |
633 | # keepalive not yet supported |
651 | # keepalive not yet supported |
… | |
… | |
641 | |
659 | |
642 | $state{handle}->starttls ("connect") if $rscheme eq "https"; |
660 | $state{handle}->starttls ("connect") if $rscheme eq "https"; |
643 | |
661 | |
644 | # handle actual, non-tunneled, request |
662 | # handle actual, non-tunneled, request |
645 | my $handle_actual_request = sub { |
663 | my $handle_actual_request = sub { |
|
|
664 | $ae_error = 596; # request phase |
|
|
665 | |
646 | $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; |
666 | $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; |
647 | |
667 | |
648 | # send request |
668 | # send request |
649 | $state{handle}->push_write ( |
669 | $state{handle}->push_write ( |
650 | "$method $rpath HTTP/1.1\015\012" |
670 | "$method $rpath HTTP/1.1\015\012" |
… | |
… | |
749 | $cb); |
769 | $cb); |
750 | } else { |
770 | } else { |
751 | $cb->($_[0], \%hdr); |
771 | $cb->($_[0], \%hdr); |
752 | } |
772 | } |
753 | }; |
773 | }; |
|
|
774 | |
|
|
775 | $ae_error = 597; # body phase |
754 | |
776 | |
755 | my $len = $hdr{"content-length"}; |
777 | my $len = $hdr{"content-length"}; |
756 | |
778 | |
757 | if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { |
779 | if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { |
758 | $finish->(undef, 598 => "Request cancelled by on_header"); |
780 | $finish->(undef, 598 => "Request cancelled by on_header"); |
… | |
… | |
780 | } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { |
802 | } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { |
781 | my $cl = 0; |
803 | my $cl = 0; |
782 | my $body = undef; |
804 | my $body = undef; |
783 | my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; |
805 | my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; |
784 | |
806 | |
785 | $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) }); |
|
|
786 | |
|
|
787 | my $read_chunk; $read_chunk = sub { |
807 | my $read_chunk; $read_chunk = sub { |
788 | $_[1] =~ /^([0-9a-fA-F]+)/ |
808 | $_[1] =~ /^([0-9a-fA-F]+)/ |
789 | or $finish->(undef, 599 => "Garbled chunked transfer encoding"); |
809 | or $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); |
790 | |
810 | |
791 | my $len = hex $1; |
811 | my $len = hex $1; |
792 | |
812 | |
793 | if ($len) { |
813 | if ($len) { |
794 | $cl += $len; |
814 | $cl += $len; |
… | |
… | |
797 | $on_body->($_[1], \%hdr) |
817 | $on_body->($_[1], \%hdr) |
798 | or return $finish->(undef, 598 => "Request cancelled by on_body"); |
818 | or return $finish->(undef, 598 => "Request cancelled by on_body"); |
799 | |
819 | |
800 | $_[0]->push_read (line => sub { |
820 | $_[0]->push_read (line => sub { |
801 | length $_[1] |
821 | length $_[1] |
802 | and return $finish->(undef, 599 => "Garbled chunked transfer encoding"); |
822 | and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); |
803 | $_[0]->push_read (line => $read_chunk); |
823 | $_[0]->push_read (line => $read_chunk); |
804 | }); |
824 | }); |
805 | }); |
825 | }); |
806 | } else { |
826 | } else { |
807 | $hdr{"content-length"} ||= $cl; |
827 | $hdr{"content-length"} ||= $cl; |
… | |
… | |
810 | if (length $_[1]) { |
830 | if (length $_[1]) { |
811 | for ("$_[1]") { |
831 | for ("$_[1]") { |
812 | y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
832 | y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
813 | |
833 | |
814 | my $hdr = parse_hdr |
834 | my $hdr = parse_hdr |
815 | or return $finish->(undef, 599 => "Garbled response trailers"); |
835 | or return $finish->(undef, $ae_error => "Garbled response trailers"); |
816 | |
836 | |
817 | %hdr = (%hdr, %$hdr); |
837 | %hdr = (%hdr, %$hdr); |
818 | } |
838 | } |
819 | } |
839 | } |
820 | |
840 | |
… | |
… | |
824 | }; |
844 | }; |
825 | |
845 | |
826 | $_[0]->push_read (line => $read_chunk); |
846 | $_[0]->push_read (line => $read_chunk); |
827 | |
847 | |
828 | } elsif ($arg{on_body}) { |
848 | } elsif ($arg{on_body}) { |
829 | $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) }); |
|
|
830 | |
|
|
831 | if ($len) { |
849 | if ($len) { |
832 | $_[0]->on_read (sub { |
850 | $_[0]->on_read (sub { |
833 | $len -= length $_[0]{rbuf}; |
851 | $len -= length $_[0]{rbuf}; |
834 | |
852 | |
835 | $arg{on_body}(delete $_[0]{rbuf}, \%hdr) |
853 | $arg{on_body}(delete $_[0]{rbuf}, \%hdr) |
… | |
… | |
849 | } |
867 | } |
850 | } else { |
868 | } else { |
851 | $_[0]->on_eof (undef); |
869 | $_[0]->on_eof (undef); |
852 | |
870 | |
853 | if ($len) { |
871 | if ($len) { |
854 | $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) }); |
|
|
855 | $_[0]->on_read (sub { |
872 | $_[0]->on_read (sub { |
856 | $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) |
873 | $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) |
857 | if $len <= length $_[0]{rbuf}; |
874 | if $len <= length $_[0]{rbuf}; |
858 | }); |
875 | }); |
859 | } else { |
876 | } else { |
860 | $_[0]->on_error (sub { |
877 | $_[0]->on_error (sub { |
861 | ($! == Errno::EPIPE || !$!) |
878 | ($! == Errno::EPIPE || !$!) |
862 | ? $finish->(delete $_[0]{rbuf}) |
879 | ? $finish->(delete $_[0]{rbuf}) |
863 | : $finish->(undef, 599 => $_[2]); |
880 | : $finish->(undef, $ae_error => $_[2]); |
864 | }); |
881 | }); |
865 | $_[0]->on_read (sub { }); |
882 | $_[0]->on_read (sub { }); |
866 | } |
883 | } |
867 | } |
884 | } |
868 | } |
885 | } |
… | |
… | |
948 | Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP |
965 | Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP |
949 | Date (RFC 2616). |
966 | Date (RFC 2616). |
950 | |
967 | |
951 | =item $timestamp = AnyEvent::HTTP::parse_date $date |
968 | =item $timestamp = AnyEvent::HTTP::parse_date $date |
952 | |
969 | |
953 | Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) and |
970 | Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a |
954 | returns the corresponding POSIX timestamp, or C<undef> if the date cannot |
971 | bunch of minor variations of those, and returns the corresponding POSIX |
955 | be parsed. |
972 | timestamp, or C<undef> if the date cannot be parsed. |
956 | |
973 | |
957 | =item $AnyEvent::HTTP::MAX_RECURSE |
974 | =item $AnyEvent::HTTP::MAX_RECURSE |
958 | |
975 | |
959 | The default value for the C<recurse> request parameter (default: C<10>). |
976 | The default value for the C<recurse> request parameter (default: C<10>). |
960 | |
977 | |
… | |
… | |
999 | sub parse_date($) { |
1016 | sub parse_date($) { |
1000 | my ($date) = @_; |
1017 | my ($date) = @_; |
1001 | |
1018 | |
1002 | my ($d, $m, $y, $H, $M, $S); |
1019 | my ($d, $m, $y, $H, $M, $S); |
1003 | |
1020 | |
1004 | if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9])[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { |
1021 | if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) { |
1005 | # RFC 822/1123, required by RFC 2616 (with " ") |
1022 | # RFC 822/1123, required by RFC 2616 (with " ") |
1006 | # cookie dates (with "-") |
1023 | # cookie dates (with "-") |
1007 | |
1024 | |
1008 | ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); |
1025 | ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); |
1009 | |
1026 | |
1010 | } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { |
1027 | } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) { |
1011 | # RFC 850 |
1028 | # RFC 850 |
1012 | ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); |
1029 | ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); |
1013 | |
1030 | |
1014 | } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) { |
1031 | } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) { |
1015 | # ISO C's asctime |
1032 | # ISO C's asctime |
1016 | ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); |
1033 | ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); |
1017 | } |
1034 | } |
1018 | # other formats fail in the loop below |
1035 | # other formats fail in the loop below |
1019 | |
1036 | |