ViewVC Help
View File | Revision Log | Show Annotations | Download File
/cvs/AnyEvent-HTTP/HTTP.pm
(Generate patch)

Comparing AnyEvent-HTTP/HTTP.pm (file contents):
Revision 1.75 by root, Sat Jan 1 00:08:51 2011 UTC vs.
Revision 1.79 by root, Sat Jan 1 20:01:07 2011 UTC

122 122
123If the server sends a header multiple times, then their contents will be 123If the server sends a header multiple times, then their contents will be
124joined together with a comma (C<,>), as per the HTTP spec. 124joined together with a comma (C<,>), as per the HTTP spec.
125 125
126If an internal error occurs, such as not being able to resolve a hostname, 126If an internal error occurs, such as not being able to resolve a hostname,
127then C<$data> will be C<undef>, C<< $headers->{Status} >> will be C<59x> 127then C<$data> will be C<undef>, C<< $headers->{Status} >> will be
128(usually C<599>) and the C<Reason> pseudo-header will contain an error 128C<590>-C<599> and the C<Reason> pseudo-header will contain an error
129message. 129message. Currently the following status codes are used:
130
131=over 4
132
133=item 595 - errors during connection etsbalishment, proxy handshake.
134
135=item 596 - errors during TLS negotiation, request sending and header processing.
136
137=item 597 - errors during body receiving or processing.
138
139=item 598 - user aborted request via C<on_header> or C<on_body>.
140
141=item 599 - other, usually nonretryable, errors (garbled URL etc.).
142
143=back
130 144
131A typical callback might look like this: 145A typical callback might look like this:
132 146
133 sub { 147 sub {
134 my ($body, $hdr) = @_; 148 my ($body, $hdr) = @_;
419 433
420 for ($set_cookie) { 434 for ($set_cookie) {
421 # parse NAME=VALUE 435 # parse NAME=VALUE
422 my @kv; 436 my @kv;
423 437
438 # expires is not http-compliant in the original cookie-spec,
439 # we support the official date format and some extensions
424 while ( 440 while (
425 m{ 441 m{
426 \G\s* 442 \G\s*
427 (?: 443 (?:
428 expires \s*=\s* ([A-Z][a-z][a-z],\ [^,;]+) 444 expires \s*=\s* ([A-Z][a-z][a-z]+,\ [^,;]+)
429 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) ) 445 | ([^=;,[:space:]]+) \s*=\s* (?: "((?:[^\\"]+|\\.)*)" | ([^=;,[:space:]]*) )
430 ) 446 )
431 }gcxsi 447 }gcxsi
432 ) { 448 ) {
433 my $name = $2; 449 my $name = $2;
600 _get_slot $uhost, sub { 616 _get_slot $uhost, sub {
601 $state{slot_guard} = shift; 617 $state{slot_guard} = shift;
602 618
603 return unless $state{connect_guard}; 619 return unless $state{connect_guard};
604 620
621 my $ae_error = 595; # connecting
622
605 my $connect_cb = sub { 623 my $connect_cb = sub {
606 $state{fh} = shift 624 $state{fh} = shift
607 or do { 625 or do {
608 my $err = "$!"; 626 my $err = "$!";
609 %state = (); 627 %state = ();
610 return $cb->(undef, { @pseudo, Status => 599, Reason => $err }); 628 return $cb->(undef, { @pseudo, Status => $ae_error, Reason => $err });
611 }; 629 };
612 630
613 return unless delete $state{connect_guard}; 631 return unless delete $state{connect_guard};
614 632
615 # get handle 633 # get handle
619 tls_ctx => $arg{tls_ctx}, 637 tls_ctx => $arg{tls_ctx},
620 # these need to be reconfigured on keepalive handles 638 # these need to be reconfigured on keepalive handles
621 timeout => $timeout, 639 timeout => $timeout,
622 on_error => sub { 640 on_error => sub {
623 %state = (); 641 %state = ();
624 $cb->(undef, { @pseudo, Status => 599, Reason => $_[2] }); 642 $cb->(undef, { @pseudo, Status => $ae_error, Reason => $_[2] });
625 }, 643 },
626 on_eof => sub { 644 on_eof => sub {
627 %state = (); 645 %state = ();
628 $cb->(undef, { @pseudo, Status => 599, Reason => "Unexpected end-of-file" }); 646 $cb->(undef, { @pseudo, Status => $ae_error, Reason => "Unexpected end-of-file" });
629 }, 647 },
630 ; 648 ;
631 649
632 # limit the number of persistent connections 650 # limit the number of persistent connections
633 # keepalive not yet supported 651 # keepalive not yet supported
641 659
642 $state{handle}->starttls ("connect") if $rscheme eq "https"; 660 $state{handle}->starttls ("connect") if $rscheme eq "https";
643 661
644 # handle actual, non-tunneled, request 662 # handle actual, non-tunneled, request
645 my $handle_actual_request = sub { 663 my $handle_actual_request = sub {
664 $ae_error = 596; # request phase
665
646 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls}; 666 $state{handle}->starttls ("connect") if $uscheme eq "https" && !exists $state{handle}{tls};
647 667
648 # send request 668 # send request
649 $state{handle}->push_write ( 669 $state{handle}->push_write (
650 "$method $rpath HTTP/1.1\015\012" 670 "$method $rpath HTTP/1.1\015\012"
661 # status line and headers 681 # status line and headers
662 $state{read_response} = sub { 682 $state{read_response} = sub {
663 for ("$_[1]") { 683 for ("$_[1]") {
664 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 684 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
665 685
666 /^HTTP\/([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/igxc 686 /^HTTP\/0*([0-9\.]+) \s+ ([0-9]{3}) (?: \s+ ([^\012]*) )? \012/gxci
667 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" })); 687 or return (%state = (), $cb->(undef, { @pseudo, Status => 599, Reason => "Invalid server response" }));
668 688
669 # 100 Continue handling 689 # 100 Continue handling
670 # should not happen as we don't send expect: 100-continue, 690 # should not happen as we don't send expect: 100-continue,
671 # but we handle it just in case. 691 # but we handle it just in case.
706 726
707 if ($recurse) { 727 if ($recurse) {
708 my $status = $hdr{Status}; 728 my $status = $hdr{Status};
709 729
710 # industry standard is to redirect POST as GET for 730 # industry standard is to redirect POST as GET for
711 # 301, 302 and 303, in contrast to http/1.0 and 1.1. 731 # 301, 302 and 303, in contrast to HTTP/1.0 and 1.1.
712 # also, the UA should ask the user for 301 and 307 and POST, 732 # also, the UA should ask the user for 301 and 307 and POST,
713 # industry standard seems to be to simply follow. 733 # industry standard seems to be to simply follow.
714 # we go with the industry standard. 734 # we go with the industry standard.
715 if ($status == 301 or $status == 302 or $status == 303) { 735 if ($status == 301 or $status == 302 or $status == 303) {
716 # HTTP/1.1 is unclear on how to mutate the method 736 # HTTP/1.1 is unclear on how to mutate the method
749 $cb); 769 $cb);
750 } else { 770 } else {
751 $cb->($_[0], \%hdr); 771 $cb->($_[0], \%hdr);
752 } 772 }
753 }; 773 };
774
775 $ae_error = 597; # body phase
754 776
755 my $len = $hdr{"content-length"}; 777 my $len = $hdr{"content-length"};
756 778
757 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) { 779 if (!$redirect && $arg{on_header} && !$arg{on_header}(\%hdr)) {
758 $finish->(undef, 598 => "Request cancelled by on_header"); 780 $finish->(undef, 598 => "Request cancelled by on_header");
780 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) { 802 } elsif ($hdr{"transfer-encoding"} =~ /\bchunked\b/i) {
781 my $cl = 0; 803 my $cl = 0;
782 my $body = undef; 804 my $body = undef;
783 my $on_body = $arg{on_body} || sub { $body .= shift; 1 }; 805 my $on_body = $arg{on_body} || sub { $body .= shift; 1 };
784 806
785 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
786
787 my $read_chunk; $read_chunk = sub { 807 my $read_chunk; $read_chunk = sub {
788 $_[1] =~ /^([0-9a-fA-F]+)/ 808 $_[1] =~ /^([0-9a-fA-F]+)/
789 or $finish->(undef, 599 => "Garbled chunked transfer encoding"); 809 or $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
790 810
791 my $len = hex $1; 811 my $len = hex $1;
792 812
793 if ($len) { 813 if ($len) {
794 $cl += $len; 814 $cl += $len;
797 $on_body->($_[1], \%hdr) 817 $on_body->($_[1], \%hdr)
798 or return $finish->(undef, 598 => "Request cancelled by on_body"); 818 or return $finish->(undef, 598 => "Request cancelled by on_body");
799 819
800 $_[0]->push_read (line => sub { 820 $_[0]->push_read (line => sub {
801 length $_[1] 821 length $_[1]
802 and return $finish->(undef, 599 => "Garbled chunked transfer encoding"); 822 and return $finish->(undef, $ae_error => "Garbled chunked transfer encoding");
803 $_[0]->push_read (line => $read_chunk); 823 $_[0]->push_read (line => $read_chunk);
804 }); 824 });
805 }); 825 });
806 } else { 826 } else {
807 $hdr{"content-length"} ||= $cl; 827 $hdr{"content-length"} ||= $cl;
810 if (length $_[1]) { 830 if (length $_[1]) {
811 for ("$_[1]") { 831 for ("$_[1]") {
812 y/\015//d; # weed out any \015, as they show up in the weirdest of places. 832 y/\015//d; # weed out any \015, as they show up in the weirdest of places.
813 833
814 my $hdr = parse_hdr 834 my $hdr = parse_hdr
815 or return $finish->(undef, 599 => "Garbled response trailers"); 835 or return $finish->(undef, $ae_error => "Garbled response trailers");
816 836
817 %hdr = (%hdr, %$hdr); 837 %hdr = (%hdr, %$hdr);
818 } 838 }
819 } 839 }
820 840
824 }; 844 };
825 845
826 $_[0]->push_read (line => $read_chunk); 846 $_[0]->push_read (line => $read_chunk);
827 847
828 } elsif ($arg{on_body}) { 848 } elsif ($arg{on_body}) {
829 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
830
831 if ($len) { 849 if ($len) {
832 $_[0]->on_read (sub { 850 $_[0]->on_read (sub {
833 $len -= length $_[0]{rbuf}; 851 $len -= length $_[0]{rbuf};
834 852
835 $arg{on_body}(delete $_[0]{rbuf}, \%hdr) 853 $arg{on_body}(delete $_[0]{rbuf}, \%hdr)
849 } 867 }
850 } else { 868 } else {
851 $_[0]->on_eof (undef); 869 $_[0]->on_eof (undef);
852 870
853 if ($len) { 871 if ($len) {
854 $_[0]->on_error (sub { $finish->(undef, 599 => $_[2]) });
855 $_[0]->on_read (sub { 872 $_[0]->on_read (sub {
856 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1) 873 $finish->((substr delete $_[0]{rbuf}, 0, $len, ""), undef, undef, 1)
857 if $len <= length $_[0]{rbuf}; 874 if $len <= length $_[0]{rbuf};
858 }); 875 });
859 } else { 876 } else {
860 $_[0]->on_error (sub { 877 $_[0]->on_error (sub {
861 ($! == Errno::EPIPE || !$!) 878 ($! == Errno::EPIPE || !$!)
862 ? $finish->(delete $_[0]{rbuf}) 879 ? $finish->(delete $_[0]{rbuf})
863 : $finish->(undef, 599 => $_[2]); 880 : $finish->(undef, $ae_error => $_[2]);
864 }); 881 });
865 $_[0]->on_read (sub { }); 882 $_[0]->on_read (sub { });
866 } 883 }
867 } 884 }
868 } 885 }
948Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP 965Takes a POSIX timestamp (seconds since the epoch) and formats it as a HTTP
949Date (RFC 2616). 966Date (RFC 2616).
950 967
951=item $timestamp = AnyEvent::HTTP::parse_date $date 968=item $timestamp = AnyEvent::HTTP::parse_date $date
952 969
953Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) and 970Takes a HTTP Date (RFC 2616) or a Cookie date (netscape cookie spec) or a
954returns the corresponding POSIX timestamp, or C<undef> if the date cannot 971bunch of minor variations of those, and returns the corresponding POSIX
955be parsed. 972timestamp, or C<undef> if the date cannot be parsed.
956 973
957=item $AnyEvent::HTTP::MAX_RECURSE 974=item $AnyEvent::HTTP::MAX_RECURSE
958 975
959The default value for the C<recurse> request parameter (default: C<10>). 976The default value for the C<recurse> request parameter (default: C<10>).
960 977
999sub parse_date($) { 1016sub parse_date($) {
1000 my ($date) = @_; 1017 my ($date) = @_;
1001 1018
1002 my ($d, $m, $y, $H, $M, $S); 1019 my ($d, $m, $y, $H, $M, $S);
1003 1020
1004 if ($date =~ /^[A-Z][a-z][a-z], ([0-9][0-9])[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1021 if ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)[\- ]([A-Z][a-z][a-z])[\- ]([0-9][0-9][0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1005 # RFC 822/1123, required by RFC 2616 (with " ") 1022 # RFC 822/1123, required by RFC 2616 (with " ")
1006 # cookie dates (with "-") 1023 # cookie dates (with "-")
1007 1024
1008 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6); 1025 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3, $4, $5, $6);
1009 1026
1010 } elsif ($date =~ /^[A-Z][a-z]+, ([0-9][0-9])-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) GMT$/) { 1027 } elsif ($date =~ /^[A-Z][a-z][a-z]+, ([0-9][0-9]?)-([A-Z][a-z][a-z])-([0-9][0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) GMT$/) {
1011 # RFC 850 1028 # RFC 850
1012 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6); 1029 ($d, $m, $y, $H, $M, $S) = ($1, $2, $3 < 69 ? $3 + 2000 : $3 + 1900, $4, $5, $6);
1013 1030
1014 } elsif ($date =~ /^[A-Z][a-z][a-z] ([A-Z][a-z][a-z]) ([0-9 ][0-9]) ([0-9][0-9]):([0-9][0-9]):([0-9][0-9]) ([0-9][0-9][0-9][0-9])$/) { 1031 } elsif ($date =~ /^[A-Z][a-z][a-z]+ ([A-Z][a-z][a-z]) ([0-9 ]?[0-9]) ([0-9][0-9]?):([0-9][0-9]?):([0-9][0-9]?) ([0-9][0-9][0-9][0-9])$/) {
1015 # ISO C's asctime 1032 # ISO C's asctime
1016 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5); 1033 ($d, $m, $y, $H, $M, $S) = ($2, $1, $6, $3, $4, $5);
1017 } 1034 }
1018 # other formats fail in the loop below 1035 # other formats fail in the loop below
1019 1036

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines