… | |
… | |
122 | |
122 | |
123 | If the server sends a header multiple times, then their contents will be |
123 | If the server sends a header multiple times, then their contents will be |
124 | joined together with a comma (C<,>), as per the HTTP spec. |
124 | joined together with a comma (C<,>), as per the HTTP spec. |
125 | |
125 | |
126 | If an internal error occurs, such as not being able to resolve a hostname, |
126 | If an internal error occurs, such as not being able to resolve a hostname, |
127 | then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<59x> |
127 | then C<$data> will be C<undef>, C<< $headers->{Status} >> will be |
128 | (usually C<599>) and the C<Reason> pseudo-header will contain an error |
128 | C<590>-C<599> and the C<Reason> pseudo-header will contain an error |
129 | message. |
129 | message. Currently the following status codes are used: |
|
|
130 | |
|
|
131 | =over 4 |
|
|
132 | |
|
|
133 | =item 595 - errors during connection etsbalishment, proxy handshake. |
|
|
134 | |
|
|
135 | =item 596 - errors during TLS negotiation, request sending and header processing. |
|
|
136 | |
|
|
137 | =item 597 - errors during body receiving or processing. |
|
|
138 | |
|
|
139 | =item 598 - user aborted request via C<on_header> or C<on_body>. |
|
|
140 | |
|
|
141 | =item 599 - other, usually nonretryable, errors (garbled URL etc.). |
|
|
142 | |
|
|
143 | =back |
130 | |
144 | |
131 | A typical callback might look like this: |
145 | A typical callback might look like this: |
132 | |
146 | |
133 | sub { |
147 | sub { |
134 | my ($body, $hdr) = @_; |
148 | my ($body, $hdr) = @_; |
… | |
… | |
412 | |
426 | |
413 | \@cookies |
427 | \@cookies |
414 | } |
428 | } |
415 | |
429 | |
416 | # parse set_cookie header into jar |
430 | # parse set_cookie header into jar |
417 | sub cookie_jar_set_cookie($$) { |
431 | sub cookie_jar_set_cookie($$$) { |
418 | my ($jar, $set_cookie) = @_; |
432 | my ($jar, $set_cookie, $uhost) = @_; |
419 | |
433 | |
420 | for ($set_cookie) { |
434 | for ($set_cookie) { |
421 | # parse NAME=VALUE |
435 | # parse NAME=VALUE |
422 | my @kv; |
436 | my @kv; |
423 | |
437 | |
|
|
438 | # expires is not http-compliant in the original cookie-spec, |
|
|
439 | # we support the official date format and some extensions |
424 | while ( |
440 | while ( |
425 | m{ |
441 | m{ |
426 | \G\s* |
442 | \G\s* |
427 | (?: |
443 | (?: |
428 | expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+) |
444 | expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+) |
429 | | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) |
445 | | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) |
430 | ) |
446 | ) |
431 | }gcxsi |
447 | }gcxsi |
432 | ) { |
448 | ) { |
433 | my $name = $2; |
449 | my $name = $2; |
… | |
… | |
472 | } else { |
488 | } else { |
473 | $cdom = $uhost; |
489 | $cdom = $uhost; |
474 | } |
490 | } |
475 | |
491 | |
476 | # store it |
492 | # store it |
477 | $arg{cookie_jar}{version} = 1; |
493 | $jar->{version} = 1; |
478 | $arg{cookie_jar}{$cdom}{$cpath}{$name} = \%kv; |
494 | $jar->{$cdom}{$cpath}{$name} = \%kv; |
479 | |
495 | |
480 | redo if /\G\s*,/gc; |
496 | redo if /\G\s*,/gc; |
481 | } |
497 | } |
482 | } |
|
|
483 | } |
498 | } |
484 | |
499 | |
485 | # continue to parse $_ for headers and place them into the arg |
500 | # continue to parse $_ for headers and place them into the arg |
486 | sub parse_hdr() { |
501 | sub parse_hdr() { |
487 | my %hdr; |
502 | my %hdr; |
… | |
… | |
601 | _get_slot $uhost, sub { |
616 | _get_slot $uhost, sub { |
602 | $state{slot_guard} = shift; |
617 | $state{slot_guard} = shift; |
603 | |
618 | |
604 | return unless $state{connect_guard}; |
619 | return unless $state{connect_guard}; |
605 | |
620 | |
|
|
621 | my $ae_error = 595; # connecting |
|
|
622 | |
606 | my $connect_cb = sub { |
623 | my $connect_cb = sub { |
607 | $state{fh} = shift |
624 | $state{fh} = shift |
608 | or do { |
625 | or do { |
609 | my $err = "$!"; |
626 | my $err = "$!"; |
610 | %state = (); |
627 | %state = (); |
611 | return $cb->(undef, { @pseudo, Status => 599, Reason => $err }); |
628 | return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err }); |
612 | }; |
629 | }; |
613 | |
|
|
614 | pop; # free memory, save a tree |
|
|
615 | |
630 | |
616 | return unless delete $state{connect_guard}; |
631 | return unless delete $state{connect_guard}; |
617 | |
632 | |
618 | # get handle |
633 | # get handle |
619 | $state{handle} = new AnyEvent::Handle |
634 | $state{handle} = new AnyEvent::Handle |
… | |
… | |
622 | tls_ctx => $arg{tls_ctx}, |
637 | tls_ctx => $arg{tls_ctx}, |
623 | # these need to be reconfigured on keepalive handles |
638 | # these need to be reconfigured on keepalive handles |
624 | timeout => $timeout, |
639 | timeout => $timeout, |
625 | on_error => sub { |
640 | on_error => sub { |
626 | %state = (); |
641 | %state = (); |
627 | $cb->(undef, { @pseudo, Status => 599, Reason => $_[2] }); |
642 | $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] }); |
628 | }, |
643 | }, |
629 | on_eof => sub { |
644 | on_eof => sub { |
630 | %state = (); |
645 | %state = (); |
631 | $cb->(undef, { @pseudo, Status => 599, Reason => "Unexpected end-of-file" }); |
646 | $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" }); |
632 | }, |
647 | }, |
633 | ; |
648 | ; |
634 | |
649 | |
635 | # limit the number of persistent connections |
650 | # limit the number of persistent connections |
636 | # keepalive not yet supported |
651 | # keepalive not yet supported |
… | |
… | |
644 | |
659 | |
645 | $state{handle}->starttls ("connect") if $rscheme eq "https"; |
660 | $state{handle}->starttls ("connect") if $rscheme eq "https"; |
646 | |
661 | |
647 | # handle actual, non-tunneled, request |
662 | # handle actual, non-tunneled, request |
648 | my $handle_actual_request = sub { |
663 | my $handle_actual_request = sub { |
|
|
664 | $ae_error = 596; # request phase |
|
|
665 | |
649 | $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; |
666 | $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; |
650 | |
667 | |
651 | # send request |
668 | # send request |
652 | $state{handle}->push_write ( |
669 | $state{handle}->push_write ( |
653 | "$method $rpath HTTP/1.1\015\012" |
670 | "$method $rpath HTTP/1.1\015\012" |
… | |
… | |
664 | # status line and headers |
681 | # status line and headers |
665 | $state{read_response} = sub { |
682 | $state{read_response} = sub { |
666 | for ("$_[1]") { |
683 | for ("$_[1]") { |
667 | y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
684 | y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
668 | |
685 | |
669 | /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/igxc |
686 | /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci |
670 | or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" })); |
687 | or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" })); |
671 | |
688 | |
672 | # 100 Continue handling |
689 | # 100 Continue handling |
673 | # should not happen as we don't send expect: 100-continue, |
690 | # should not happen as we don't send expect: 100-continue, |
674 | # but we handle it just in case. |
691 | # but we handle it just in case. |
… | |
… | |
709 | |
726 | |
710 | if ($recurse) { |
727 | if ($recurse) { |
711 | my $status = $hdr{Status}; |
728 | my $status = $hdr{Status}; |
712 | |
729 | |
713 | # industry standard is to redirect POST as GET for |
730 | # industry standard is to redirect POST as GET for |
714 | # 301, 302 and 303, in contrast to http/1.0 and 1.1. |
731 | # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1. |
715 | # also, the UA should ask the user for 301 and 307 and POST, |
732 | # also, the UA should ask the user for 301 and 307 and POST, |
716 | # industry standard seems to be to simply follow. |
733 | # industry standard seems to be to simply follow. |
717 | # we go with the industry standard. |
734 | # we go with the industry standard. |
718 | if ($status == 301 or $status == 302 or $status == 303) { |
735 | if ($status == 301 or $status == 302 or $status == 303) { |
719 | # HTTP/1.1 is unclear on how to mutate the method |
736 | # HTTP/1.1 is unclear on how to mutate the method |
… | |
… | |
723 | $redirect = 1; |
740 | $redirect = 1; |
724 | } |
741 | } |
725 | } |
742 | } |
726 | |
743 | |
727 | my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive]) |
744 | my $finish = sub { # ($data, $err_status, $err_reason[, $keepalive]) |
728 | my $keepalive = pop; |
745 | my $may_keep_alive = $_[3]; |
729 | |
746 | |
730 | $state{handle}->destroy if $state{handle}; |
747 | $state{handle}->destroy if $state{handle}; |
731 | %state = (); |
748 | %state = (); |
732 | |
749 | |
733 | if (defined $_[1]) { |
750 | if (defined $_[1]) { |
… | |
… | |
735 | $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2]; |
752 | $hdr{OrigReason} = $hdr{Reason}; $hdr{Reason} = $_[2]; |
736 | } |
753 | } |
737 | |
754 | |
738 | # set-cookie processing |
755 | # set-cookie processing |
739 | if ($arg{cookie_jar}) { |
756 | if ($arg{cookie_jar}) { |
740 | cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}; |
757 | cookie_jar_set_cookie $arg{cookie_jar}, $hdr{"set-cookie"}, $uhost; |
|
|
758 | } |
741 | |
759 | |
742 | if ($redirect && exists $hdr{location}) { |
760 | if ($redirect && exists $hdr{location}) { |
743 | # we ignore any errors, as it is very common to receive |
761 | # we ignore any errors, as it is very common to receive |
744 | # Content-Length != 0 but no actual body |
762 | # Content-Length != 0 but no actual body |
745 | # we also access %hdr, as $_[1] might be an erro |
763 | # we also access %hdr, as $_[1] might be an erro |
… | |
… | |
751 | $cb); |
769 | $cb); |
752 | } else { |
770 | } else { |
753 | $cb->($_[0], \%hdr); |
771 | $cb->($_[0], \%hdr); |
754 | } |
772 | } |
755 | }; |
773 | }; |
|
|
774 | |
|
|
775 | $ae_error = 597; # body phase |
756 | |
776 | |
757 | my $len = $hdr{"content-length"}; |
777 | my $len = $hdr{"content-length"}; |
758 | |
778 | |
759 | if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { |
779 | if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { |
760 | $finish->(undef, 598 => "Request cancelled by on_header"); |
780 | $finish->(undef, 598 => "Request cancelled by on_header"); |
… | |
… | |
782 | } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { |
802 | } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { |
783 | my $cl = 0; |
803 | my $cl = 0; |
784 | my $body = undef; |
804 | my $body = undef; |
785 | my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; |
805 | my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; |
786 | |
806 | |
787 | $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) }); |
|
|
788 | |
|
|
789 | my $read_chunk; $read_chunk = sub { |
807 | my $read_chunk; $read_chunk = sub { |
790 | $_[1] =~ /^([0-9a-fA-F]+)/ |
808 | $_[1] =~ /^([0-9a-fA-F]+)/ |
791 | or $finish->(undef, 599 => "Garbled chunked transfer encoding"); |
809 | or $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); |
792 | |
810 | |
793 | my $len = hex $1; |
811 | my $len = hex $1; |
794 | |
812 | |
795 | if ($len) { |
813 | if ($len) { |
796 | $cl += $len; |
814 | $cl += $len; |
… | |
… | |
799 | $on_body->($_[1], \%hdr) |
817 | $on_body->($_[1], \%hdr) |
800 | or return $finish->(undef, 598 => "Request cancelled by on_body"); |
818 | or return $finish->(undef, 598 => "Request cancelled by on_body"); |
801 | |
819 | |
802 | $_[0]->push_read (line => sub { |
820 | $_[0]->push_read (line => sub { |
803 | length $_[1] |
821 | length $_[1] |
804 | and return $finish->(undef, 599 => "Garbled chunked transfer encoding"); |
822 | and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding"); |
805 | $_[0]->push_read (line => $read_chunk); |
823 | $_[0]->push_read (line => $read_chunk); |
806 | }); |
824 | }); |
807 | }); |
825 | }); |
808 | } else { |
826 | } else { |
809 | $hdr{"content-length"} ||= $cl; |
827 | $hdr{"content-length"} ||= $cl; |
… | |
… | |
812 | if (length $_[1]) { |
830 | if (length $_[1]) { |
813 | for ("$_[1]") { |
831 | for ("$_[1]") { |
814 | y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
832 | y/\015//d; # weed out any \015, as they show up in the weirdest of places. |
815 | |
833 | |
816 | my $hdr = parse_hdr |
834 | my $hdr = parse_hdr |
817 | or return $finish->(undef, 599 => "Garbled response trailers"); |
835 | or return $finish->(undef, $ae_error => "Garbled response trailers"); |
818 | |
836 | |
819 | %hdr = (%hdr, %$hdr); |
837 | %hdr = (%hdr, %$hdr); |
820 | } |
838 | } |
821 | } |
839 | } |
822 | |
840 | |
… | |
… | |
826 | }; |
844 | }; |
827 | |
845 | |
828 | $_[0]->push_read (line => $read_chunk); |
846 | $_[0]->push_read (line => $read_chunk); |
829 | |
847 | |
830 | } elsif ($arg{on_body}) { |
848 | } elsif ($arg{on_body}) { |
831 | $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) }); |
|
|
832 | |
|
|
833 | if ($len) { |
849 | if ($len) { |
834 | $_[0]->on_read (sub { |
850 | $_[0]->on_read (sub { |
835 | $len -= length $_[0]{rbuf}; |
851 | $len -= length $_[0]{rbuf}; |
836 | |
852 | |
837 | $arg{on_body}(delete $_[0]{rbuf}, \%hdr) |
853 | $arg{on_body}(delete $_[0]{rbuf}, \%hdr) |
… | |
… | |
851 | } |
867 | } |
852 | } else { |
868 | } else { |
853 | $_[0]->on_eof (undef); |
869 | $_[0]->on_eof (undef); |
854 | |
870 | |
855 | if ($len) { |
871 | if ($len) { |
856 | $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) }); |
|
|
857 | $_[0]->on_read (sub { |
872 | $_[0]->on_read (sub { |
858 | $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) |
873 | $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) |
859 | if $len <= length $_[0]{rbuf}; |
874 | if $len <= length $_[0]{rbuf}; |
860 | }); |
875 | }); |
861 | } else { |
876 | } else { |
862 | $_[0]->on_error (sub { |
877 | $_[0]->on_error (sub { |
863 | ($! == Errno::EPIPE || !$!) |
878 | ($! == Errno::EPIPE || !$!) |
864 | ? $finish->(delete $_[0]{rbuf}) |
879 | ? $finish->(delete $_[0]{rbuf}) |
865 | : $finish->(undef, 599 => $_[2]); |
880 | : $finish->(undef, $ae_error => $_[2]); |
866 | }); |
881 | }); |
867 | $_[0]->on_read (sub { }); |
882 | $_[0]->on_read (sub { }); |
868 | } |
883 | } |
869 | } |
884 | } |
870 | } |
885 | } |
… | |
… | |
950 | Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP |
965 | Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP |
951 | Date (RFC 2616). |
966 | Date (RFC 2616). |
952 | |
967 | |
953 | =item $timestamp = AnyEvent::HTTP::parse_date $date |
968 | =item $timestamp = AnyEvent::HTTP::parse_date $date |
954 | |
969 | |
955 | Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) and |
970 | Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a |
956 | returns the corresponding POSIX timestamp, or C<undef> if the date cannot |
971 | bunch of minor variations of those, and returns the corresponding POSIX |
957 | be parsed. |
972 | timestamp, or C<undef> if the date cannot be parsed. |
958 | |
973 | |
959 | =item $AnyEvent::HTTP::MAX_RECURSE |
974 | =item $AnyEvent::HTTP::MAX_RECURSE |
960 | |
975 | |
961 | The default value for the C<recurse> request parameter (default: C<10>). |
976 | The default value for the C<recurse> request parameter (default: C<10>). |
962 | |
977 | |
… | |
… | |
1001 | sub parse_date($) { |
1016 | sub parse_date($) { |
1002 | my ($date) = @_; |
1017 | my ($date) = @_; |
1003 | |
1018 | |
1004 | my ($d, $m, $y, $H, $M, $S); |
1019 | my ($d, $m, $y, $H, $M, $S); |
1005 | |
1020 | |
1006 | if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9])[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { |
1021 | if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) { |
1007 | # RFC 822/1123, required by RFC 2616 (with " ") |
1022 | # RFC 822/1123, required by RFC 2616 (with " ") |
1008 | # cookie dates (with "-") |
1023 | # cookie dates (with "-") |
1009 | |
1024 | |
1010 | ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); |
1025 | ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); |
1011 | |
1026 | |
1012 | } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { |
1027 | } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) { |
1013 | # RFC 850 |
1028 | # RFC 850 |
1014 | ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); |
1029 | ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); |
1015 | |
1030 | |
1016 | } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) { |
1031 | } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) { |
1017 | # ISO C's asctime |
1032 | # ISO C's asctime |
1018 | ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); |
1033 | ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); |
1019 | } |
1034 | } |
1020 | # other formats fail in the loop below |
1035 | # other formats fail in the loop below |
1021 | |
1036 | |